merge trunk up to r1671137

git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/branches/lucene6271@1671151 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Robert Muir 2015-04-03 20:13:06 +00:00
commit 363fc49258
43 changed files with 2348 additions and 358 deletions

View File

@ -40,10 +40,15 @@ API Changes
New Features New Features
* LUCENE-6308: Span queries now share document conjunction/intersection * LUCENE-6308, LUCENE-6385, LUCENE-6391: Span queries now share
document conjunction/intersection
code with boolean queries, and use two-phased iterators for code with boolean queries, and use two-phased iterators for
faster intersection by avoiding loading positions in certain cases. faster intersection by avoiding loading positions in certain cases.
(Paul Elschot, Robert Muir via Mike McCandless) (Paul Elschot, Terry Smith, Robert Muir via Mike McCandless)
* LUCENE-6352: Added a new query time join to the join module that uses
global ordinals, which is faster for subsequent joins between reopens.
(Martijn van Groningen, Adrien Grand)
Optimizations Optimizations
@ -52,6 +57,9 @@ Optimizations
faster IndexWriter.deleteAll in that case (Robert Muir, Adrien faster IndexWriter.deleteAll in that case (Robert Muir, Adrien
Grand, Mike McCandless) Grand, Mike McCandless)
* LUCENE-6388: Optimize SpanNearQuery when payloads are not present.
(Robert Muir)
Bug Fixes Bug Fixes
* LUCENE-6378: Fix all RuntimeExceptions to throw the underlying root cause. * LUCENE-6378: Fix all RuntimeExceptions to throw the underlying root cause.
@ -123,6 +131,10 @@ Bug Fixes
DocumentsWriterStallControl to prevent hangs during indexing if we DocumentsWriterStallControl to prevent hangs during indexing if we
miss a .notify/All somewhere (Mike McCandless) miss a .notify/All somewhere (Mike McCandless)
* LUCENE-6386: Correct IndexWriter.forceMerge documentation to state
that up to 3X (X = current index size) spare disk space may be needed
to complete forceMerge(1). (Robert Muir, Shai Erera, Mike McCandless)
Optimizations Optimizations
* LUCENE-6183, LUCENE-5647: Avoid recompressing stored fields * LUCENE-6183, LUCENE-5647: Avoid recompressing stored fields

View File

@ -1547,14 +1547,15 @@ public class IndexWriter implements Closeable, TwoPhaseCommit, Accountable {
* longer be changed).</p> * longer be changed).</p>
* *
* <p>Note that this requires free space that is proportional * <p>Note that this requires free space that is proportional
* to the size of the index in your Directory (2X if you're * to the size of the index in your Directory: 2X if you are
* using compound file format). For example, if your index * not using compound file format, and 3X if you are.
* size is 10 MB then you need an additional 10 MB free for * For example, if your index size is 10 MB then you need
* this to complete (20 MB if you're using compound file * an additional 20 MB free for this to complete (30 MB if
* format). This is also affected by the {@link Codec} that * you're using compound file format). This is also affected
* is used to execute the merge, and may result in even a * by the {@link Codec} that is used to execute the merge,
* bigger index. Also, it's best to call {@link #commit()} * and may result in even a bigger index. Also, it's best
* afterwards, to allow IndexWriter to free up disk space.</p> * to call {@link #commit()} afterwards, to allow IndexWriter
* to free up disk space.</p>
* *
* <p>If some but not all readers re-open while merging * <p>If some but not all readers re-open while merging
* is underway, this will cause {@code > 2X} temporary * is underway, this will cause {@code > 2X} temporary

View File

@ -232,8 +232,8 @@ public class PayloadNearQuery extends SpanNearQuery {
scratch.bytes = thePayload; scratch.bytes = thePayload;
scratch.offset = 0; scratch.offset = 0;
scratch.length = thePayload.length; scratch.length = thePayload.length;
payloadScore = function.currentScore(doc, fieldName, start, end, payloadScore = function.currentScore(docID(), fieldName, start, end,
payloadsSeen, payloadScore, docScorer.computePayloadFactor(doc, payloadsSeen, payloadScore, docScorer.computePayloadFactor(docID(),
spans.startPosition(), spans.endPosition(), scratch)); spans.startPosition(), spans.endPosition(), scratch));
++payloadsSeen; ++payloadsSeen;
} }
@ -241,7 +241,7 @@ public class PayloadNearQuery extends SpanNearQuery {
// //
@Override @Override
protected boolean setFreqCurrentDoc() throws IOException { protected void setFreqCurrentDoc() throws IOException {
freq = 0.0f; freq = 0.0f;
payloadScore = 0; payloadScore = 0;
payloadsSeen = 0; payloadsSeen = 0;
@ -255,14 +255,12 @@ public class PayloadNearQuery extends SpanNearQuery {
getPayloads(spansArr); getPayloads(spansArr);
startPos = spans.nextStartPosition(); startPos = spans.nextStartPosition();
} while (startPos != Spans.NO_MORE_POSITIONS); } while (startPos != Spans.NO_MORE_POSITIONS);
return true;
} }
@Override @Override
public float score() throws IOException { public float scoreCurrentDoc() throws IOException {
return super.scoreCurrentDoc()
return super.score() * function.docScore(docID(), fieldName, payloadsSeen, payloadScore);
* function.docScore(doc, fieldName, payloadsSeen, payloadScore);
} }
} }

View File

@ -99,7 +99,7 @@ public class PayloadTermQuery extends SpanTermQuery {
} }
@Override @Override
protected boolean setFreqCurrentDoc() throws IOException { protected void setFreqCurrentDoc() throws IOException {
freq = 0.0f; freq = 0.0f;
numMatches = 0; numMatches = 0;
payloadScore = 0; payloadScore = 0;
@ -115,7 +115,6 @@ public class PayloadTermQuery extends SpanTermQuery {
startPos = spans.nextStartPosition(); startPos = spans.nextStartPosition();
} while (startPos != Spans.NO_MORE_POSITIONS); } while (startPos != Spans.NO_MORE_POSITIONS);
return freq != 0;
} }
protected void processPayload(Similarity similarity) throws IOException { protected void processPayload(Similarity similarity) throws IOException {
@ -123,11 +122,11 @@ public class PayloadTermQuery extends SpanTermQuery {
final PostingsEnum postings = termSpans.getPostings(); final PostingsEnum postings = termSpans.getPostings();
payload = postings.getPayload(); payload = postings.getPayload();
if (payload != null) { if (payload != null) {
payloadScore = function.currentScore(doc, term.field(), payloadScore = function.currentScore(docID(), term.field(),
spans.startPosition(), spans.endPosition(), payloadsSeen, payloadScore, spans.startPosition(), spans.endPosition(), payloadsSeen, payloadScore,
docScorer.computePayloadFactor(doc, spans.startPosition(), spans.endPosition(), payload)); docScorer.computePayloadFactor(docID(), spans.startPosition(), spans.endPosition(), payload));
} else { } else {
payloadScore = function.currentScore(doc, term.field(), payloadScore = function.currentScore(docID(), term.field(),
spans.startPosition(), spans.endPosition(), payloadsSeen, payloadScore, 1F); spans.startPosition(), spans.endPosition(), payloadsSeen, payloadScore, 1F);
} }
payloadsSeen++; payloadsSeen++;
@ -143,8 +142,7 @@ public class PayloadTermQuery extends SpanTermQuery {
* @throws IOException if there is a low-level I/O error * @throws IOException if there is a low-level I/O error
*/ */
@Override @Override
public float score() throws IOException { public float scoreCurrentDoc() throws IOException {
return includeSpanScore ? getSpanScore() * getPayloadScore() return includeSpanScore ? getSpanScore() * getPayloadScore()
: getPayloadScore(); : getPayloadScore();
} }
@ -160,7 +158,7 @@ public class PayloadTermQuery extends SpanTermQuery {
* @see #score() * @see #score()
*/ */
protected float getSpanScore() throws IOException { protected float getSpanScore() throws IOException {
return super.score(); return super.scoreCurrentDoc();
} }
/** /**
@ -170,7 +168,7 @@ public class PayloadTermQuery extends SpanTermQuery {
* {@link PayloadFunction#docScore(int, String, int, float)} * {@link PayloadFunction#docScore(int, String, int, float)}
*/ */
protected float getPayloadScore() { protected float getPayloadScore() {
return function.docScore(doc, term.field(), payloadsSeen, payloadScore); return function.docScore(docID(), term.field(), payloadsSeen, payloadScore);
} }
} }

View File

@ -29,11 +29,11 @@ import java.util.Objects;
* Common super class for un/ordered Spans * Common super class for un/ordered Spans
*/ */
abstract class NearSpans extends Spans { abstract class NearSpans extends Spans {
SpanNearQuery query; final SpanNearQuery query;
int allowedSlop; final int allowedSlop;
List<Spans> subSpans; // in query order final Spans[] subSpans; // in query order
DocIdSetIterator conjunction; // use to move to next doc with all clauses final DocIdSetIterator conjunction; // use to move to next doc with all clauses
boolean atFirstInCurrentDoc; boolean atFirstInCurrentDoc;
boolean oneExhaustedInCurrentDoc; // no more results possbile in current doc boolean oneExhaustedInCurrentDoc; // no more results possbile in current doc
@ -44,7 +44,7 @@ abstract class NearSpans extends Spans {
if (subSpans.size() < 2) { if (subSpans.size() < 2) {
throw new IllegalArgumentException("Less than 2 subSpans: " + query); throw new IllegalArgumentException("Less than 2 subSpans: " + query);
} }
this.subSpans = Objects.requireNonNull(subSpans); // in query order this.subSpans = subSpans.toArray(new Spans[subSpans.size()]); // in query order
this.conjunction = ConjunctionDISI.intersect(subSpans); this.conjunction = ConjunctionDISI.intersect(subSpans);
} }
@ -91,13 +91,8 @@ abstract class NearSpans extends Spans {
return res; return res;
} }
private Spans[] subSpansArray = null; // init only when needed.
public Spans[] getSubSpans() { public Spans[] getSubSpans() {
if (subSpansArray == null) { return subSpans;
subSpansArray = subSpans.toArray(new Spans[subSpans.size()]);
}
return subSpansArray;
} }
} }

View File

@ -18,12 +18,8 @@ package org.apache.lucene.search.spans;
*/ */
import java.io.IOException; import java.io.IOException;
import java.util.ArrayList;
import java.util.HashSet;
import java.util.LinkedList;
import java.util.List; import java.util.List;
import java.util.Collection; import java.util.Collection;
import java.util.Set;
/** A Spans that is formed from the ordered subspans of a SpanNearQuery /** A Spans that is formed from the ordered subspans of a SpanNearQuery
* where the subspans do not overlap and have a maximum slop between them, * where the subspans do not overlap and have a maximum slop between them,
@ -146,11 +142,11 @@ public class NearSpansOrdered extends NearSpans {
* otherwise at least one is exhausted in the current doc. * otherwise at least one is exhausted in the current doc.
*/ */
private boolean stretchToOrder() throws IOException { private boolean stretchToOrder() throws IOException {
Spans prevSpans = subSpans.get(0); Spans prevSpans = subSpans[0];
assert prevSpans.startPosition() != NO_MORE_POSITIONS : "prevSpans no start position "+prevSpans; assert prevSpans.startPosition() != NO_MORE_POSITIONS : "prevSpans no start position "+prevSpans;
assert prevSpans.endPosition() != NO_MORE_POSITIONS; assert prevSpans.endPosition() != NO_MORE_POSITIONS;
for (int i = 1; i < subSpans.size(); i++) { for (int i = 1; i < subSpans.length; i++) {
Spans spans = subSpans.get(i); Spans spans = subSpans[i];
assert spans.startPosition() != NO_MORE_POSITIONS; assert spans.startPosition() != NO_MORE_POSITIONS;
assert spans.endPosition() != NO_MORE_POSITIONS; assert spans.endPosition() != NO_MORE_POSITIONS;
@ -169,15 +165,14 @@ public class NearSpansOrdered extends NearSpans {
* on all subSpans, except the last one, in reverse order. * on all subSpans, except the last one, in reverse order.
*/ */
protected boolean shrinkToAfterShortestMatch() throws IOException { protected boolean shrinkToAfterShortestMatch() throws IOException {
Spans lastSubSpans = subSpans.get(subSpans.size() - 1); Spans lastSubSpans = subSpans[subSpans.length - 1];
matchStart = lastSubSpans.startPosition(); matchStart = lastSubSpans.startPosition();
matchEnd = lastSubSpans.endPosition(); matchEnd = lastSubSpans.endPosition();
int matchSlop = 0; int matchSlop = 0;
int lastStart = matchStart; int lastStart = matchStart;
int lastEnd = matchEnd; for (int i = subSpans.length - 2; i >= 0; i--) {
for (int i = subSpans.size() - 2; i >= 0; i--) { Spans prevSpans = subSpans[i];
Spans prevSpans = subSpans.get(i);
int prevStart = prevSpans.startPosition(); int prevStart = prevSpans.startPosition();
int prevEnd = prevSpans.endPosition(); int prevEnd = prevSpans.endPosition();
@ -206,7 +201,6 @@ public class NearSpansOrdered extends NearSpans {
*/ */
matchStart = prevStart; matchStart = prevStart;
lastStart = prevStart; lastStart = prevStart;
lastEnd = prevEnd;
} }
boolean match = matchSlop <= allowedSlop; boolean match = matchSlop <= allowedSlop;
@ -224,16 +218,14 @@ public class NearSpansOrdered extends NearSpans {
return atFirstInCurrentDoc ? -1 : matchEnd; return atFirstInCurrentDoc ? -1 : matchEnd;
} }
/** Throws an UnsupportedOperationException */
@Override @Override
public Collection<byte[]> getPayload() throws IOException { public Collection<byte[]> getPayload() throws IOException {
throw new UnsupportedOperationException("Use NearSpansPayloadOrdered instead"); return null;
} }
/** Throws an UnsupportedOperationException */
@Override @Override
public boolean isPayloadAvailable() { public boolean isPayloadAvailable() {
throw new UnsupportedOperationException("Use NearSpansPayloadOrdered instead"); return false;
} }
@Override @Override

View File

@ -47,7 +47,7 @@ public class NearSpansPayloadOrdered extends NearSpansOrdered {
* Also collect the payloads. * Also collect the payloads.
*/ */
protected boolean shrinkToAfterShortestMatch() throws IOException { protected boolean shrinkToAfterShortestMatch() throws IOException {
Spans lastSubSpans = subSpans.get(subSpans.size() - 1); Spans lastSubSpans = subSpans[subSpans.length - 1];
matchStart = lastSubSpans.startPosition(); matchStart = lastSubSpans.startPosition();
matchEnd = lastSubSpans.endPosition(); matchEnd = lastSubSpans.endPosition();
@ -62,9 +62,8 @@ public class NearSpansPayloadOrdered extends NearSpansOrdered {
int matchSlop = 0; int matchSlop = 0;
int lastStart = matchStart; int lastStart = matchStart;
int lastEnd = matchEnd; for (int i = subSpans.length - 2; i >= 0; i--) {
for (int i = subSpans.size() - 2; i >= 0; i--) { Spans prevSpans = subSpans[i];
Spans prevSpans = subSpans.get(i);
if (prevSpans.isPayloadAvailable()) { if (prevSpans.isPayloadAvailable()) {
Collection<byte[]> payload = prevSpans.getPayload(); Collection<byte[]> payload = prevSpans.getPayload();
@ -112,7 +111,6 @@ public class NearSpansPayloadOrdered extends NearSpansOrdered {
*/ */
matchStart = prevStart; matchStart = prevStart;
lastStart = prevStart; lastStart = prevStart;
lastEnd = prevEnd;
} }
boolean match = matchSlop <= allowedSlop; boolean match = matchSlop <= allowedSlop;

View File

@ -18,19 +18,17 @@ package org.apache.lucene.search.spans;
*/ */
import java.io.IOException; import java.io.IOException;
import java.util.List; import java.util.List;
import java.util.ArrayList; import java.util.ArrayList;
import java.util.Iterator; import java.util.Iterator;
import java.util.Map; import java.util.Map;
import java.util.Set; import java.util.Set;
import org.apache.lucene.index.LeafReaderContext; import org.apache.lucene.index.LeafReaderContext;
import org.apache.lucene.index.IndexReader; import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.Term; import org.apache.lucene.index.Term;
import org.apache.lucene.index.TermContext; import org.apache.lucene.index.TermContext;
import org.apache.lucene.index.Terms;
import org.apache.lucene.search.Query; import org.apache.lucene.search.Query;
import org.apache.lucene.util.Bits; import org.apache.lucene.util.Bits;
import org.apache.lucene.util.ToStringUtils; import org.apache.lucene.util.ToStringUtils;
@ -131,10 +129,15 @@ public class SpanNearQuery extends SpanQuery implements Cloneable {
return null; // all required return null; // all required
} }
} }
Terms terms = context.reader().terms(field);
if (terms == null) {
return null; // field does not exist
}
// all NearSpans require at least two subSpans // all NearSpans require at least two subSpans
return (! inOrder) ? new NearSpansUnordered(this, subSpans) return (! inOrder) ? new NearSpansUnordered(this, subSpans)
: collectPayloads ? new NearSpansPayloadOrdered(this, subSpans) : collectPayloads && terms.hasPayloads() ? new NearSpansPayloadOrdered(this, subSpans)
: new NearSpansOrdered(this, subSpans); : new NearSpansOrdered(this, subSpans);
} }

View File

@ -146,7 +146,7 @@ public abstract class SpanPositionCheckQuery extends SpanQuery implements Clonea
startPos = in.nextStartPosition(); startPos = in.nextStartPosition();
assert startPos != NO_MORE_POSITIONS; assert startPos != NO_MORE_POSITIONS;
for (;;) { for (;;) {
switch(acceptPosition(this)) { switch(acceptPosition(in)) {
case YES: case YES:
atFirstInCurrentDoc = true; atFirstInCurrentDoc = true;
return in.docID(); return in.docID();
@ -180,7 +180,7 @@ public abstract class SpanPositionCheckQuery extends SpanQuery implements Clonea
if (startPos == NO_MORE_POSITIONS) { if (startPos == NO_MORE_POSITIONS) {
return NO_MORE_POSITIONS; return NO_MORE_POSITIONS;
} }
switch(acceptPosition(this)) { switch(acceptPosition(in)) {
case YES: case YES:
return startPos; return startPos;
case NO: case NO:

View File

@ -21,48 +21,58 @@ import java.io.IOException;
import java.util.Objects; import java.util.Objects;
import org.apache.lucene.search.Scorer; import org.apache.lucene.search.Scorer;
import org.apache.lucene.search.TwoPhaseIterator;
import org.apache.lucene.search.similarities.Similarity; import org.apache.lucene.search.similarities.Similarity;
/** /**
* Public for extension only. * Public for extension only.
*/ */
public class SpanScorer extends Scorer { public class SpanScorer extends Scorer {
protected Spans spans; /** underlying spans we are scoring from */
protected final Spans spans;
protected int doc; /** similarity used in default score impl */
protected float freq;
protected int numMatches;
protected final Similarity.SimScorer docScorer; protected final Similarity.SimScorer docScorer;
protected SpanScorer(Spans spans, SpanWeight weight, Similarity.SimScorer docScorer) /** accumulated sloppy freq (computed in setFreqCurrentDoc) */
throws IOException { protected float freq;
/** number of matches (computed in setFreqCurrentDoc) */
protected int numMatches;
private int lastScoredDoc = -1; // last doc we called setFreqCurrentDoc() for
protected SpanScorer(Spans spans, SpanWeight weight, Similarity.SimScorer docScorer) throws IOException {
super(weight); super(weight);
this.docScorer = Objects.requireNonNull(docScorer); this.docScorer = Objects.requireNonNull(docScorer);
this.spans = Objects.requireNonNull(spans); this.spans = Objects.requireNonNull(spans);
this.doc = -1;
} }
@Override @Override
public int nextDoc() throws IOException { public final int nextDoc() throws IOException {
int prevDoc = doc; return spans.nextDoc();
doc = spans.nextDoc();
if (doc != NO_MORE_DOCS) {
setFreqCurrentDoc();
}
return doc;
} }
@Override @Override
public int advance(int target) throws IOException { public final int advance(int target) throws IOException {
int prevDoc = doc; return spans.advance(target);
doc = spans.advance(target); }
if (doc != NO_MORE_DOCS) {
/**
* Ensure setFreqCurrentDoc is called, if not already called for the current doc.
*/
private final void ensureFreq() throws IOException {
int currentDoc = spans.docID();
if (lastScoredDoc != currentDoc) {
setFreqCurrentDoc(); setFreqCurrentDoc();
lastScoredDoc = currentDoc;
} }
return doc;
} }
protected boolean setFreqCurrentDoc() throws IOException { /**
* Sets {@link #freq} and {@link #numMatches} for the current document.
* <p>
* This will be called at most once per document.
*/
protected void setFreqCurrentDoc() throws IOException {
freq = 0.0f; freq = 0.0f;
numMatches = 0; numMatches = 0;
@ -90,34 +100,46 @@ public class SpanScorer extends Scorer {
assert spans.startPosition() == Spans.NO_MORE_POSITIONS : "incorrect final start position, spans="+spans; assert spans.startPosition() == Spans.NO_MORE_POSITIONS : "incorrect final start position, spans="+spans;
assert spans.endPosition() == Spans.NO_MORE_POSITIONS : "incorrect final end position, spans="+spans; assert spans.endPosition() == Spans.NO_MORE_POSITIONS : "incorrect final end position, spans="+spans;
}
return true;
/**
* Score the current doc. The default implementation scores the doc
* with the similarity using the slop-adjusted {@link #freq}.
*/
protected float scoreCurrentDoc() throws IOException {
return docScorer.score(spans.docID(), freq);
} }
@Override @Override
public int docID() { return doc; } public final int docID() { return spans.docID(); }
@Override @Override
public float score() throws IOException { public final float score() throws IOException {
float s = docScorer.score(doc, freq); ensureFreq();
return s; return scoreCurrentDoc();
} }
@Override @Override
public int freq() throws IOException { public final int freq() throws IOException {
ensureFreq();
return numMatches; return numMatches;
} }
/** Returns the intermediate "sloppy freq" adjusted for edit distance /** Returns the intermediate "sloppy freq" adjusted for edit distance
* @lucene.internal */ * @lucene.internal */
// only public so .payloads can see it. // only public so .payloads can see it.
public float sloppyFreq() throws IOException { public final float sloppyFreq() throws IOException {
ensureFreq();
return freq; return freq;
} }
@Override @Override
public long cost() { public final long cost() {
return spans.cost(); return spans.cost();
} }
@Override
public final TwoPhaseIterator asTwoPhaseIterator() {
return spans.asTwoPhaseIterator();
}
} }

View File

@ -199,8 +199,8 @@ public class TestIndexWriterForceMerge extends LuceneTestCase {
assertTrue("forceMerge used too much temporary space: starting usage was " assertTrue("forceMerge used too much temporary space: starting usage was "
+ startDiskUsage + " bytes; final usage was " + finalDiskUsage + startDiskUsage + " bytes; final usage was " + finalDiskUsage
+ " bytes; max temp usage was " + maxDiskUsage + " bytes; max temp usage was " + maxDiskUsage
+ " but should have been " + (3 * maxStartFinalDiskUsage) + " but should have been at most " + (4 * maxStartFinalDiskUsage)
+ " (= 3X starting usage), BEFORE=" + startListing + "AFTER=" + listFiles(dir), maxDiskUsage <= 3 * maxStartFinalDiskUsage); + " (= 4X starting usage), BEFORE=" + startListing + "AFTER=" + listFiles(dir), maxDiskUsage <= 4 * maxStartFinalDiskUsage);
dir.close(); dir.close();
} }

View File

@ -162,7 +162,12 @@ final class JustCompileSearchSpans {
} }
@Override @Override
protected boolean setFreqCurrentDoc() { protected void setFreqCurrentDoc() {
throw new UnsupportedOperationException(UNSUPPORTED_MSG);
}
@Override
protected float scoreCurrentDoc() throws IOException {
throw new UnsupportedOperationException(UNSUPPORTED_MSG); throw new UnsupportedOperationException(UNSUPPORTED_MSG);
} }
} }

View File

@ -21,6 +21,7 @@ import org.apache.lucene.index.Term;
import org.apache.lucene.search.BooleanClause.Occur; import org.apache.lucene.search.BooleanClause.Occur;
import org.apache.lucene.search.BooleanQuery; import org.apache.lucene.search.BooleanQuery;
import org.apache.lucene.search.PhraseQuery; import org.apache.lucene.search.PhraseQuery;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.SearchEquivalenceTestBase; import org.apache.lucene.search.SearchEquivalenceTestBase;
import org.apache.lucene.search.TermQuery; import org.apache.lucene.search.TermQuery;
@ -106,4 +107,122 @@ public class TestSpanSearchEquivalence extends SearchEquivalenceTestBase {
SpanNearQuery q2 = new SpanNearQuery(subquery, 3, false); SpanNearQuery q2 = new SpanNearQuery(subquery, 3, false);
assertSubsetOf(q1, q2); assertSubsetOf(q1, q2);
} }
/** SpanNearQuery([A B], N, false) ⊆ SpanNearQuery([A B], N+1, false) */
public void testSpanNearIncreasingSloppiness() throws Exception {
Term t1 = randomTerm();
Term t2 = randomTerm();
SpanQuery subquery[] = new SpanQuery[] { new SpanTermQuery(t1), new SpanTermQuery(t2) };
for (int i = 0; i < 10; i++) {
SpanNearQuery q1 = new SpanNearQuery(subquery, i, false);
SpanNearQuery q2 = new SpanNearQuery(subquery, i+1, false);
assertSubsetOf(q1, q2);
}
}
/** SpanNearQuery([A B C], N, false) ⊆ SpanNearQuery([A B C], N+1, false) */
public void testSpanNearIncreasingSloppiness3() throws Exception {
Term t1 = randomTerm();
Term t2 = randomTerm();
Term t3 = randomTerm();
SpanQuery subquery[] = new SpanQuery[] { new SpanTermQuery(t1), new SpanTermQuery(t2), new SpanTermQuery(t3) };
for (int i = 0; i < 10; i++) {
SpanNearQuery q1 = new SpanNearQuery(subquery, i, false);
SpanNearQuery q2 = new SpanNearQuery(subquery, i+1, false);
assertSubsetOf(q1, q2);
}
}
/** SpanNearQuery([A B], N, true) ⊆ SpanNearQuery([A B], N+1, true) */
public void testSpanNearIncreasingOrderedSloppiness() throws Exception {
Term t1 = randomTerm();
Term t2 = randomTerm();
SpanQuery subquery[] = new SpanQuery[] { new SpanTermQuery(t1), new SpanTermQuery(t2) };
for (int i = 0; i < 10; i++) {
SpanNearQuery q1 = new SpanNearQuery(subquery, i, false);
SpanNearQuery q2 = new SpanNearQuery(subquery, i+1, false);
assertSubsetOf(q1, q2);
}
}
/** SpanNearQuery([A B C], N, true) ⊆ SpanNearQuery([A B C], N+1, true) */
public void testSpanNearIncreasingOrderedSloppiness3() throws Exception {
Term t1 = randomTerm();
Term t2 = randomTerm();
Term t3 = randomTerm();
SpanQuery subquery[] = new SpanQuery[] { new SpanTermQuery(t1), new SpanTermQuery(t2), new SpanTermQuery(t3) };
for (int i = 0; i < 10; i++) {
SpanNearQuery q1 = new SpanNearQuery(subquery, i, true);
SpanNearQuery q2 = new SpanNearQuery(subquery, i+1, true);
assertSubsetOf(q1, q2);
}
}
/** SpanFirstQuery(A, N) ⊆ TermQuery(A) */
public void testSpanFirstTerm() throws Exception {
Term t1 = randomTerm();
for (int i = 0; i < 10; i++) {
Query q1 = new SpanFirstQuery(new SpanTermQuery(t1), i);
Query q2 = new TermQuery(t1);
assertSubsetOf(q1, q2);
}
}
/** SpanFirstQuery(A, N) ⊆ SpanFirstQuery(A, N+1) */
public void testSpanFirstTermIncreasing() throws Exception {
Term t1 = randomTerm();
for (int i = 0; i < 10; i++) {
Query q1 = new SpanFirstQuery(new SpanTermQuery(t1), i);
Query q2 = new SpanFirstQuery(new SpanTermQuery(t1), i+1);
assertSubsetOf(q1, q2);
}
}
/** SpanFirstQuery(A, ∞) = TermQuery(A) */
public void testSpanFirstTermEverything() throws Exception {
Term t1 = randomTerm();
Query q1 = new SpanFirstQuery(new SpanTermQuery(t1), Integer.MAX_VALUE);
Query q2 = new TermQuery(t1);
assertSameSet(q1, q2);
}
/** SpanFirstQuery([A B], N) ⊆ SpanNearQuery([A B]) */
@AwaitsFix(bugUrl = "https://issues.apache.org/jira/browse/LUCENE-6393")
public void testSpanFirstNear() throws Exception {
Term t1 = randomTerm();
Term t2 = randomTerm();
SpanQuery subquery[] = new SpanQuery[] { new SpanTermQuery(t1), new SpanTermQuery(t2) };
SpanQuery nearQuery = new SpanNearQuery(subquery, 10, true);
for (int i = 0; i < 10; i++) {
Query q1 = new SpanFirstQuery(nearQuery, i);
Query q2 = nearQuery;
assertSubsetOf(q1, q2);
}
}
/** SpanFirstQuery([A B], N) ⊆ SpanFirstQuery([A B], N+1) */
@AwaitsFix(bugUrl = "https://issues.apache.org/jira/browse/LUCENE-6393")
public void testSpanFirstNearIncreasing() throws Exception {
Term t1 = randomTerm();
Term t2 = randomTerm();
SpanQuery subquery[] = new SpanQuery[] { new SpanTermQuery(t1), new SpanTermQuery(t2) };
SpanQuery nearQuery = new SpanNearQuery(subquery, 10, true);
for (int i = 0; i < 10; i++) {
Query q1 = new SpanFirstQuery(nearQuery, i);
Query q2 = new SpanFirstQuery(nearQuery, i+1);
assertSubsetOf(q1, q2);
}
}
/** SpanFirstQuery([A B], ∞) = SpanNearQuery([A B]) */
@AwaitsFix(bugUrl = "https://issues.apache.org/jira/browse/LUCENE-6393")
public void testSpanFirstNearEverything() throws Exception {
Term t1 = randomTerm();
Term t2 = randomTerm();
SpanQuery subquery[] = new SpanQuery[] { new SpanTermQuery(t1), new SpanTermQuery(t2) };
SpanQuery nearQuery = new SpanNearQuery(subquery, 10, true);
Query q1 = new SpanFirstQuery(nearQuery, Integer.MAX_VALUE);
Query q2 = nearQuery;
assertSameSet(q1, q2);
}
} }

View File

@ -306,6 +306,9 @@ public class WeightedSpanTermExtractor {
} }
Bits acceptDocs = context.reader().getLiveDocs(); Bits acceptDocs = context.reader().getLiveDocs();
final Spans spans = q.getSpans(context, acceptDocs, termContexts); final Spans spans = q.getSpans(context, acceptDocs, termContexts);
if (spans == null) {
return;
}
// collect span positions // collect span positions
while (spans.nextDoc() != Spans.NO_MORE_DOCS) { while (spans.nextDoc() != Spans.NO_MORE_DOCS) {

View File

@ -0,0 +1,82 @@
package org.apache.lucene.search.highlight;
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
import java.io.IOException;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.MockAnalyzer;
import org.apache.lucene.analysis.MockTokenizer;
import org.apache.lucene.index.Term;
import org.apache.lucene.search.BooleanClause.Occur;
import org.apache.lucene.search.BooleanQuery;
import org.apache.lucene.search.PhraseQuery;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.TermQuery;
import org.apache.lucene.search.spans.SpanNearQuery;
import org.apache.lucene.search.spans.SpanQuery;
import org.apache.lucene.search.spans.SpanTermQuery;
import org.apache.lucene.util.LuceneTestCase;
public class MissesTest extends LuceneTestCase {
public void testTermQuery() throws IOException, InvalidTokenOffsetsException {
try (Analyzer analyzer = new MockAnalyzer(random(), MockTokenizer.WHITESPACE, false)) {
final Query query = new TermQuery(new Term("test", "foo"));
final Highlighter highlighter = new Highlighter(new SimpleHTMLFormatter(), new QueryScorer(query));
assertEquals("this is a <B>foo</B> bar example",
highlighter.getBestFragment(analyzer, "test", "this is a foo bar example"));
assertNull(highlighter.getBestFragment(analyzer, "test", "this does not match"));
}
}
public void testBooleanQuery() throws IOException, InvalidTokenOffsetsException {
try (Analyzer analyzer = new MockAnalyzer(random(), MockTokenizer.WHITESPACE, false)) {
final BooleanQuery query = new BooleanQuery();
query.add(new TermQuery(new Term("test", "foo")), Occur.MUST);
query.add(new TermQuery(new Term("test", "bar")), Occur.MUST);
final Highlighter highlighter = new Highlighter(new SimpleHTMLFormatter(), new QueryScorer(query));
assertEquals("this is a <B>foo</B> <B>bar</B> example",
highlighter.getBestFragment(analyzer, "test", "this is a foo bar example"));
assertNull(highlighter.getBestFragment(analyzer, "test", "this does not match"));
}
}
public void testPhraseQuery() throws IOException, InvalidTokenOffsetsException {
try (Analyzer analyzer = new MockAnalyzer(random(), MockTokenizer.WHITESPACE, false)) {
final PhraseQuery query = new PhraseQuery();
query.add(new Term("test", "foo"));
query.add(new Term("test", "bar"));
final Highlighter highlighter = new Highlighter(new SimpleHTMLFormatter(), new QueryScorer(query));
assertEquals("this is a <B>foo</B> <B>bar</B> example",
highlighter.getBestFragment(analyzer, "test", "this is a foo bar example"));
assertNull(highlighter.getBestFragment(analyzer, "test", "this does not match"));
}
}
public void testSpanNearQuery() throws IOException, InvalidTokenOffsetsException {
try (Analyzer analyzer = new MockAnalyzer(random(), MockTokenizer.WHITESPACE, false)) {
final Query query = new SpanNearQuery(new SpanQuery[] {
new SpanTermQuery(new Term("test", "foo")),
new SpanTermQuery(new Term("test", "bar"))}, 0, true);
final Highlighter highlighter = new Highlighter(new SimpleHTMLFormatter(), new QueryScorer(query));
assertEquals("this is a <B>foo</B> <B>bar</B> example",
highlighter.getBestFragment(analyzer, "test", "this is a foo bar example"));
assertNull(highlighter.getBestFragment(analyzer, "test", "this does not match"));
}
}
}

View File

@ -116,9 +116,9 @@ org.apache.hadoop.version = 2.6.0
# The httpcore version is often different from the httpclient and httpmime versions, # The httpcore version is often different from the httpclient and httpmime versions,
# so the httpcore version value should not share the same symbolic name with them. # so the httpcore version value should not share the same symbolic name with them.
/org.apache.httpcomponents/httpclient = 4.3.1 /org.apache.httpcomponents/httpclient = 4.4.1
/org.apache.httpcomponents/httpcore = 4.3 /org.apache.httpcomponents/httpcore = 4.4.1
/org.apache.httpcomponents/httpmime = 4.3.1 /org.apache.httpcomponents/httpmime = 4.4.1
/org.apache.ivy/ivy = 2.3.0 /org.apache.ivy/ivy = 2.3.0

View File

@ -0,0 +1,97 @@
package org.apache.lucene.search.join;
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
import org.apache.lucene.index.SortedDocValues;
import org.apache.lucene.search.DocIdSetIterator;
import org.apache.lucene.search.Scorer;
import org.apache.lucene.search.TwoPhaseIterator;
import org.apache.lucene.search.Weight;
import org.apache.lucene.util.LongBitSet;
import java.io.IOException;
abstract class BaseGlobalOrdinalScorer extends Scorer {
final LongBitSet foundOrds;
final SortedDocValues values;
final Scorer approximationScorer;
float score;
public BaseGlobalOrdinalScorer(Weight weight, LongBitSet foundOrds, SortedDocValues values, Scorer approximationScorer) {
super(weight);
this.foundOrds = foundOrds;
this.values = values;
this.approximationScorer = approximationScorer;
}
@Override
public float score() throws IOException {
return score;
}
@Override
public int docID() {
return approximationScorer.docID();
}
@Override
public int nextDoc() throws IOException {
return advance(approximationScorer.docID() + 1);
}
@Override
public TwoPhaseIterator asTwoPhaseIterator() {
final DocIdSetIterator approximation = new DocIdSetIterator() {
@Override
public int docID() {
return approximationScorer.docID();
}
@Override
public int nextDoc() throws IOException {
return approximationScorer.nextDoc();
}
@Override
public int advance(int target) throws IOException {
return approximationScorer.advance(target);
}
@Override
public long cost() {
return approximationScorer.cost();
}
};
return createTwoPhaseIterator(approximation);
}
@Override
public long cost() {
return approximationScorer.cost();
}
@Override
public int freq() throws IOException {
return 1;
}
protected abstract TwoPhaseIterator createTwoPhaseIterator(DocIdSetIterator approximation);
}

View File

@ -0,0 +1,114 @@
package org.apache.lucene.search.join;
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
import org.apache.lucene.index.DocValues;
import org.apache.lucene.index.LeafReaderContext;
import org.apache.lucene.index.MultiDocValues;
import org.apache.lucene.index.SortedDocValues;
import org.apache.lucene.search.Collector;
import org.apache.lucene.search.LeafCollector;
import org.apache.lucene.search.Scorer;
import org.apache.lucene.util.LongBitSet;
import org.apache.lucene.util.LongValues;
import java.io.IOException;
/**
* A collector that collects all ordinals from a specified field matching the query.
*
* @lucene.experimental
*/
final class GlobalOrdinalsCollector implements Collector {
final String field;
final LongBitSet collectedOrds;
final MultiDocValues.OrdinalMap ordinalMap;
GlobalOrdinalsCollector(String field, MultiDocValues.OrdinalMap ordinalMap, long valueCount) {
this.field = field;
this.ordinalMap = ordinalMap;
this.collectedOrds = new LongBitSet(valueCount);
}
public LongBitSet getCollectorOrdinals() {
return collectedOrds;
}
@Override
public boolean needsScores() {
return false;
}
@Override
public LeafCollector getLeafCollector(LeafReaderContext context) throws IOException {
SortedDocValues docTermOrds = DocValues.getSorted(context.reader(), field);
if (ordinalMap != null) {
LongValues segmentOrdToGlobalOrdLookup = ordinalMap.getGlobalOrds(context.ord);
return new OrdinalMapCollector(docTermOrds, segmentOrdToGlobalOrdLookup);
} else {
return new SegmentOrdinalCollector(docTermOrds);
}
}
final class OrdinalMapCollector implements LeafCollector {
private final SortedDocValues docTermOrds;
private final LongValues segmentOrdToGlobalOrdLookup;
OrdinalMapCollector(SortedDocValues docTermOrds, LongValues segmentOrdToGlobalOrdLookup) {
this.docTermOrds = docTermOrds;
this.segmentOrdToGlobalOrdLookup = segmentOrdToGlobalOrdLookup;
}
@Override
public void collect(int doc) throws IOException {
final long segmentOrd = docTermOrds.getOrd(doc);
if (segmentOrd != -1) {
final long globalOrd = segmentOrdToGlobalOrdLookup.get(segmentOrd);
collectedOrds.set(globalOrd);
}
}
@Override
public void setScorer(Scorer scorer) throws IOException {
}
}
final class SegmentOrdinalCollector implements LeafCollector {
private final SortedDocValues docTermOrds;
SegmentOrdinalCollector(SortedDocValues docTermOrds) {
this.docTermOrds = docTermOrds;
}
@Override
public void collect(int doc) throws IOException {
final long segmentOrd = docTermOrds.getOrd(doc);
if (segmentOrd != -1) {
collectedOrds.set(segmentOrd);
}
}
@Override
public void setScorer(Scorer scorer) throws IOException {
}
}
}

View File

@ -0,0 +1,245 @@
package org.apache.lucene.search.join;
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
import org.apache.lucene.index.DocValues;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.LeafReaderContext;
import org.apache.lucene.index.MultiDocValues;
import org.apache.lucene.index.SortedDocValues;
import org.apache.lucene.index.Term;
import org.apache.lucene.search.ComplexExplanation;
import org.apache.lucene.search.DocIdSetIterator;
import org.apache.lucene.search.Explanation;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.Scorer;
import org.apache.lucene.search.TwoPhaseIterator;
import org.apache.lucene.search.Weight;
import org.apache.lucene.util.Bits;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.LongBitSet;
import org.apache.lucene.util.LongValues;
import java.io.IOException;
import java.util.Set;
final class GlobalOrdinalsQuery extends Query {
// All the ords of matching docs found with OrdinalsCollector.
private final LongBitSet foundOrds;
private final String joinField;
private final MultiDocValues.OrdinalMap globalOrds;
// Is also an approximation of the docs that will match. Can be all docs that have toField or something more specific.
private final Query toQuery;
// just for hashcode and equals:
private final Query fromQuery;
private final IndexReader indexReader;
GlobalOrdinalsQuery(LongBitSet foundOrds, String joinField, MultiDocValues.OrdinalMap globalOrds, Query toQuery, Query fromQuery, IndexReader indexReader) {
this.foundOrds = foundOrds;
this.joinField = joinField;
this.globalOrds = globalOrds;
this.toQuery = toQuery;
this.fromQuery = fromQuery;
this.indexReader = indexReader;
}
@Override
public Weight createWeight(IndexSearcher searcher, boolean needsScores) throws IOException {
return new W(this, toQuery.createWeight(searcher, false));
}
@Override
public void extractTerms(Set<Term> terms) {
fromQuery.extractTerms(terms);
}
@Override
public boolean equals(Object o) {
if (this == o) return true;
if (o == null || getClass() != o.getClass()) return false;
if (!super.equals(o)) return false;
GlobalOrdinalsQuery that = (GlobalOrdinalsQuery) o;
if (!fromQuery.equals(that.fromQuery)) return false;
if (!joinField.equals(that.joinField)) return false;
if (!toQuery.equals(that.toQuery)) return false;
if (!indexReader.equals(that.indexReader)) return false;
return true;
}
@Override
public int hashCode() {
int result = super.hashCode();
result = 31 * result + joinField.hashCode();
result = 31 * result + toQuery.hashCode();
result = 31 * result + fromQuery.hashCode();
result = 31 * result + indexReader.hashCode();
return result;
}
@Override
public String toString(String field) {
return "GlobalOrdinalsQuery{" +
"joinField=" + joinField +
'}';
}
final class W extends Weight {
private final Weight approximationWeight;
private float queryNorm;
private float queryWeight;
W(Query query, Weight approximationWeight) {
super(query);
this.approximationWeight = approximationWeight;
}
@Override
public Explanation explain(LeafReaderContext context, int doc) throws IOException {
SortedDocValues values = DocValues.getSorted(context.reader(), joinField);
if (values != null) {
int segmentOrd = values.getOrd(doc);
if (segmentOrd != -1) {
BytesRef joinValue = values.lookupOrd(segmentOrd);
return new ComplexExplanation(true, queryNorm, "Score based on join value " + joinValue.utf8ToString());
}
}
return new ComplexExplanation(false, 0.0f, "Not a match");
}
@Override
public float getValueForNormalization() throws IOException {
queryWeight = getBoost();
return queryWeight * queryWeight;
}
@Override
public void normalize(float norm, float topLevelBoost) {
this.queryNorm = norm * topLevelBoost;
queryWeight *= this.queryNorm;
}
@Override
public Scorer scorer(LeafReaderContext context, Bits acceptDocs) throws IOException {
SortedDocValues values = DocValues.getSorted(context.reader(), joinField);
if (values == null) {
return null;
}
Scorer approximationScorer = approximationWeight.scorer(context, acceptDocs);
if (approximationScorer == null) {
return null;
}
if (globalOrds != null) {
return new OrdinalMapScorer(this, queryNorm, foundOrds, values, approximationScorer, globalOrds.getGlobalOrds(context.ord));
} {
return new SegmentOrdinalScorer(this, queryNorm, foundOrds, values, approximationScorer);
}
}
}
final static class OrdinalMapScorer extends BaseGlobalOrdinalScorer {
final LongValues segmentOrdToGlobalOrdLookup;
public OrdinalMapScorer(Weight weight, float score, LongBitSet foundOrds, SortedDocValues values, Scorer approximationScorer, LongValues segmentOrdToGlobalOrdLookup) {
super(weight, foundOrds, values, approximationScorer);
this.score = score;
this.segmentOrdToGlobalOrdLookup = segmentOrdToGlobalOrdLookup;
}
@Override
public int advance(int target) throws IOException {
for (int docID = approximationScorer.advance(target); docID < NO_MORE_DOCS; docID = approximationScorer.nextDoc()) {
final long segmentOrd = values.getOrd(docID);
if (segmentOrd != -1) {
final long globalOrd = segmentOrdToGlobalOrdLookup.get(segmentOrd);
if (foundOrds.get(globalOrd)) {
return docID;
}
}
}
return NO_MORE_DOCS;
}
@Override
protected TwoPhaseIterator createTwoPhaseIterator(DocIdSetIterator approximation) {
return new TwoPhaseIterator(approximation) {
@Override
public boolean matches() throws IOException {
final long segmentOrd = values.getOrd(approximationScorer.docID());
if (segmentOrd != -1) {
final long globalOrd = segmentOrdToGlobalOrdLookup.get(segmentOrd);
if (foundOrds.get(globalOrd)) {
return true;
}
}
return false;
}
};
}
}
final static class SegmentOrdinalScorer extends BaseGlobalOrdinalScorer {
public SegmentOrdinalScorer(Weight weight, float score, LongBitSet foundOrds, SortedDocValues values, Scorer approximationScorer) {
super(weight, foundOrds, values, approximationScorer);
this.score = score;
}
@Override
public int advance(int target) throws IOException {
for (int docID = approximationScorer.advance(target); docID < NO_MORE_DOCS; docID = approximationScorer.nextDoc()) {
final long segmentOrd = values.getOrd(docID);
if (segmentOrd != -1) {
if (foundOrds.get(segmentOrd)) {
return docID;
}
}
}
return NO_MORE_DOCS;
}
@Override
protected TwoPhaseIterator createTwoPhaseIterator(DocIdSetIterator approximation) {
return new TwoPhaseIterator(approximation) {
@Override
public boolean matches() throws IOException {
final long segmentOrd = values.getOrd(approximationScorer.docID());
if (segmentOrd != -1) {
if (foundOrds.get(segmentOrd)) {
return true;
}
}
return false;
}
};
}
}
}

View File

@ -0,0 +1,250 @@
package org.apache.lucene.search.join;
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
import org.apache.lucene.index.DocValues;
import org.apache.lucene.index.LeafReaderContext;
import org.apache.lucene.index.MultiDocValues;
import org.apache.lucene.index.SortedDocValues;
import org.apache.lucene.search.Collector;
import org.apache.lucene.search.LeafCollector;
import org.apache.lucene.search.Scorer;
import org.apache.lucene.util.LongBitSet;
import org.apache.lucene.util.LongValues;
import java.io.IOException;
abstract class GlobalOrdinalsWithScoreCollector implements Collector {
final String field;
final MultiDocValues.OrdinalMap ordinalMap;
final LongBitSet collectedOrds;
protected final Scores scores;
GlobalOrdinalsWithScoreCollector(String field, MultiDocValues.OrdinalMap ordinalMap, long valueCount) {
if (valueCount > Integer.MAX_VALUE) {
// We simply don't support more than
throw new IllegalStateException("Can't collect more than [" + Integer.MAX_VALUE + "] ids");
}
this.field = field;
this.ordinalMap = ordinalMap;
this.collectedOrds = new LongBitSet(valueCount);
this.scores = new Scores(valueCount);
}
public LongBitSet getCollectorOrdinals() {
return collectedOrds;
}
public float score(int globalOrdinal) {
return scores.getScore(globalOrdinal);
}
protected abstract void doScore(int globalOrd, float existingScore, float newScore);
@Override
public LeafCollector getLeafCollector(LeafReaderContext context) throws IOException {
SortedDocValues docTermOrds = DocValues.getSorted(context.reader(), field);
if (ordinalMap != null) {
LongValues segmentOrdToGlobalOrdLookup = ordinalMap.getGlobalOrds(context.ord);
return new OrdinalMapCollector(docTermOrds, segmentOrdToGlobalOrdLookup);
} else {
return new SegmentOrdinalCollector(docTermOrds);
}
}
@Override
public boolean needsScores() {
return true;
}
final class OrdinalMapCollector implements LeafCollector {
private final SortedDocValues docTermOrds;
private final LongValues segmentOrdToGlobalOrdLookup;
private Scorer scorer;
OrdinalMapCollector(SortedDocValues docTermOrds, LongValues segmentOrdToGlobalOrdLookup) {
this.docTermOrds = docTermOrds;
this.segmentOrdToGlobalOrdLookup = segmentOrdToGlobalOrdLookup;
}
@Override
public void collect(int doc) throws IOException {
final long segmentOrd = docTermOrds.getOrd(doc);
if (segmentOrd != -1) {
final int globalOrd = (int) segmentOrdToGlobalOrdLookup.get(segmentOrd);
collectedOrds.set(globalOrd);
float existingScore = scores.getScore(globalOrd);
float newScore = scorer.score();
doScore(globalOrd, existingScore, newScore);
}
}
@Override
public void setScorer(Scorer scorer) throws IOException {
this.scorer = scorer;
}
}
final class SegmentOrdinalCollector implements LeafCollector {
private final SortedDocValues docTermOrds;
private Scorer scorer;
SegmentOrdinalCollector(SortedDocValues docTermOrds) {
this.docTermOrds = docTermOrds;
}
@Override
public void collect(int doc) throws IOException {
final int segmentOrd = docTermOrds.getOrd(doc);
if (segmentOrd != -1) {
collectedOrds.set(segmentOrd);
float existingScore = scores.getScore(segmentOrd);
float newScore = scorer.score();
doScore(segmentOrd, existingScore, newScore);
}
}
@Override
public void setScorer(Scorer scorer) throws IOException {
this.scorer = scorer;
}
}
static final class Max extends GlobalOrdinalsWithScoreCollector {
public Max(String field, MultiDocValues.OrdinalMap ordinalMap, long valueCount) {
super(field, ordinalMap, valueCount);
}
@Override
protected void doScore(int globalOrd, float existingScore, float newScore) {
scores.setScore(globalOrd, Math.max(existingScore, newScore));
}
}
static final class Sum extends GlobalOrdinalsWithScoreCollector {
public Sum(String field, MultiDocValues.OrdinalMap ordinalMap, long valueCount) {
super(field, ordinalMap, valueCount);
}
@Override
protected void doScore(int globalOrd, float existingScore, float newScore) {
scores.setScore(globalOrd, existingScore + newScore);
}
}
static final class Avg extends GlobalOrdinalsWithScoreCollector {
private final Occurrences occurrences;
public Avg(String field, MultiDocValues.OrdinalMap ordinalMap, long valueCount) {
super(field, ordinalMap, valueCount);
this.occurrences = new Occurrences(valueCount);
}
@Override
protected void doScore(int globalOrd, float existingScore, float newScore) {
occurrences.increment(globalOrd);
scores.setScore(globalOrd, existingScore + newScore);
}
@Override
public float score(int globalOrdinal) {
return scores.getScore(globalOrdinal) / occurrences.getOccurence(globalOrdinal);
}
}
// Because the global ordinal is directly used as a key to a score we should be somewhat smart about allocation
// the scores array. Most of the times not all docs match so splitting the scores array up in blocks can prevent creation of huge arrays.
// Also working with smaller arrays is supposed to be more gc friendly
//
// At first a hash map implementation would make sense, but in the case that more than half of docs match this becomes more expensive
// then just using an array.
// Maybe this should become a method parameter?
static final int arraySize = 4096;
static final class Scores {
final float[][] blocks;
private Scores(long valueCount) {
long blockSize = valueCount + arraySize - 1;
blocks = new float[(int) ((blockSize) / arraySize)][];
}
public void setScore(int globalOrdinal, float score) {
int block = globalOrdinal / arraySize;
int offset = globalOrdinal % arraySize;
float[] scores = blocks[block];
if (scores == null) {
blocks[block] = scores = new float[arraySize];
}
scores[offset] = score;
}
public float getScore(int globalOrdinal) {
int block = globalOrdinal / arraySize;
int offset = globalOrdinal % arraySize;
float[] scores = blocks[block];
float score;
if (scores != null) {
score = scores[offset];
} else {
score = 0f;
}
return score;
}
}
static final class Occurrences {
final int[][] blocks;
private Occurrences(long valueCount) {
long blockSize = valueCount + arraySize - 1;
blocks = new int[(int) (blockSize / arraySize)][];
}
public void increment(int globalOrdinal) {
int block = globalOrdinal / arraySize;
int offset = globalOrdinal % arraySize;
int[] occurrences = blocks[block];
if (occurrences == null) {
blocks[block] = occurrences = new int[arraySize];
}
occurrences[offset]++;
}
public int getOccurence(int globalOrdinal) {
int block = globalOrdinal / arraySize;
int offset = globalOrdinal % arraySize;
int[] occurrences = blocks[block];
return occurrences[offset];
}
}
}

View File

@ -0,0 +1,256 @@
package org.apache.lucene.search.join;
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
import org.apache.lucene.index.DocValues;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.LeafReaderContext;
import org.apache.lucene.index.MultiDocValues;
import org.apache.lucene.index.SortedDocValues;
import org.apache.lucene.index.Term;
import org.apache.lucene.search.ComplexExplanation;
import org.apache.lucene.search.DocIdSetIterator;
import org.apache.lucene.search.Explanation;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.Scorer;
import org.apache.lucene.search.TwoPhaseIterator;
import org.apache.lucene.search.Weight;
import org.apache.lucene.util.Bits;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.LongValues;
import java.io.IOException;
import java.util.Set;
final class GlobalOrdinalsWithScoreQuery extends Query {
private final GlobalOrdinalsWithScoreCollector collector;
private final String joinField;
private final MultiDocValues.OrdinalMap globalOrds;
// Is also an approximation of the docs that will match. Can be all docs that have toField or something more specific.
private final Query toQuery;
// just for hashcode and equals:
private final Query fromQuery;
private final IndexReader indexReader;
GlobalOrdinalsWithScoreQuery(GlobalOrdinalsWithScoreCollector collector, String joinField, MultiDocValues.OrdinalMap globalOrds, Query toQuery, Query fromQuery, IndexReader indexReader) {
this.collector = collector;
this.joinField = joinField;
this.globalOrds = globalOrds;
this.toQuery = toQuery;
this.fromQuery = fromQuery;
this.indexReader = indexReader;
}
@Override
public Weight createWeight(IndexSearcher searcher, boolean needsScores) throws IOException {
return new W(this, toQuery.createWeight(searcher, false));
}
@Override
public void extractTerms(Set<Term> terms) {
fromQuery.extractTerms(terms);
}
@Override
public boolean equals(Object o) {
if (this == o) return true;
if (o == null || getClass() != o.getClass()) return false;
if (!super.equals(o)) return false;
GlobalOrdinalsWithScoreQuery that = (GlobalOrdinalsWithScoreQuery) o;
if (!fromQuery.equals(that.fromQuery)) return false;
if (!joinField.equals(that.joinField)) return false;
if (!toQuery.equals(that.toQuery)) return false;
if (!indexReader.equals(that.indexReader)) return false;
return true;
}
@Override
public int hashCode() {
int result = super.hashCode();
result = 31 * result + joinField.hashCode();
result = 31 * result + toQuery.hashCode();
result = 31 * result + fromQuery.hashCode();
result = 31 * result + indexReader.hashCode();
return result;
}
@Override
public String toString(String field) {
return "GlobalOrdinalsQuery{" +
"joinField=" + joinField +
'}';
}
final class W extends Weight {
private final Weight approximationWeight;
private float queryNorm;
private float queryWeight;
W(Query query, Weight approximationWeight) {
super(query);
this.approximationWeight = approximationWeight;
}
@Override
public Explanation explain(LeafReaderContext context, int doc) throws IOException {
SortedDocValues values = DocValues.getSorted(context.reader(), joinField);
if (values != null) {
int segmentOrd = values.getOrd(doc);
if (segmentOrd != -1) {
final float score;
if (globalOrds != null) {
long globalOrd = globalOrds.getGlobalOrds(context.ord).get(segmentOrd);
score = collector.scores.getScore((int) globalOrd);
} else {
score = collector.score(segmentOrd);
}
BytesRef joinValue = values.lookupOrd(segmentOrd);
return new ComplexExplanation(true, score, "Score based on join value " + joinValue.utf8ToString());
}
}
return new ComplexExplanation(false, 0.0f, "Not a match");
}
@Override
public float getValueForNormalization() throws IOException {
queryWeight = getBoost();
return queryWeight * queryWeight;
}
@Override
public void normalize(float norm, float topLevelBoost) {
this.queryNorm = norm * topLevelBoost;
queryWeight *= this.queryNorm;
}
@Override
public Scorer scorer(LeafReaderContext context, Bits acceptDocs) throws IOException {
SortedDocValues values = DocValues.getSorted(context.reader(), joinField);
if (values == null) {
return null;
}
Scorer approximationScorer = approximationWeight.scorer(context, acceptDocs);
if (approximationScorer == null) {
return null;
} else if (globalOrds != null) {
return new OrdinalMapScorer(this, collector, values, approximationScorer, globalOrds.getGlobalOrds(context.ord));
} else {
return new SegmentOrdinalScorer(this, collector, values, approximationScorer);
}
}
}
final static class OrdinalMapScorer extends BaseGlobalOrdinalScorer {
final LongValues segmentOrdToGlobalOrdLookup;
final GlobalOrdinalsWithScoreCollector collector;
public OrdinalMapScorer(Weight weight, GlobalOrdinalsWithScoreCollector collector, SortedDocValues values, Scorer approximationScorer, LongValues segmentOrdToGlobalOrdLookup) {
super(weight, collector.getCollectorOrdinals(), values, approximationScorer);
this.segmentOrdToGlobalOrdLookup = segmentOrdToGlobalOrdLookup;
this.collector = collector;
}
@Override
public int advance(int target) throws IOException {
for (int docID = approximationScorer.advance(target); docID < NO_MORE_DOCS; docID = approximationScorer.nextDoc()) {
final long segmentOrd = values.getOrd(docID);
if (segmentOrd != -1) {
final long globalOrd = segmentOrdToGlobalOrdLookup.get(segmentOrd);
if (foundOrds.get(globalOrd)) {
score = collector.score((int) globalOrd);
return docID;
}
}
}
return NO_MORE_DOCS;
}
@Override
protected TwoPhaseIterator createTwoPhaseIterator(DocIdSetIterator approximation) {
return new TwoPhaseIterator(approximation) {
@Override
public boolean matches() throws IOException {
final long segmentOrd = values.getOrd(approximationScorer.docID());
if (segmentOrd != -1) {
final long globalOrd = segmentOrdToGlobalOrdLookup.get(segmentOrd);
if (foundOrds.get(globalOrd)) {
score = collector.score((int) globalOrd);
return true;
}
}
return false;
}
};
}
}
final static class SegmentOrdinalScorer extends BaseGlobalOrdinalScorer {
final GlobalOrdinalsWithScoreCollector collector;
public SegmentOrdinalScorer(Weight weight, GlobalOrdinalsWithScoreCollector collector, SortedDocValues values, Scorer approximationScorer) {
super(weight, collector.getCollectorOrdinals(), values, approximationScorer);
this.collector = collector;
}
@Override
public int advance(int target) throws IOException {
for (int docID = approximationScorer.advance(target); docID < NO_MORE_DOCS; docID = approximationScorer.nextDoc()) {
final int segmentOrd = values.getOrd(docID);
if (segmentOrd != -1) {
if (foundOrds.get(segmentOrd)) {
score = collector.score(segmentOrd);
return docID;
}
}
}
return NO_MORE_DOCS;
}
@Override
protected TwoPhaseIterator createTwoPhaseIterator(DocIdSetIterator approximation) {
return new TwoPhaseIterator(approximation) {
@Override
public boolean matches() throws IOException {
final int segmentOrd = values.getOrd(approximationScorer.docID());
if (segmentOrd != -1) {
if (foundOrds.get(segmentOrd)) {
score = collector.score(segmentOrd);
return true;
}
}
return false;
}
};
}
}
}

View File

@ -17,7 +17,12 @@ package org.apache.lucene.search.join;
* limitations under the License. * limitations under the License.
*/ */
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.LeafReader;
import org.apache.lucene.index.MultiDocValues;
import org.apache.lucene.index.SortedDocValues;
import org.apache.lucene.search.IndexSearcher; import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.MatchNoDocsQuery;
import org.apache.lucene.search.Query; import org.apache.lucene.search.Query;
import java.io.IOException; import java.io.IOException;
@ -90,4 +95,78 @@ public final class JoinUtil {
} }
} }
/**
* A query time join using global ordinals over a dedicated join field.
*
* This join has certain restrictions and requirements:
* 1) A document can only refer to one other document. (but can be referred by one or more documents)
* 2) Documents on each side of the join must be distinguishable. Typically this can be done by adding an extra field
* that identifies the "from" and "to" side and then the fromQuery and toQuery must take the this into account.
* 3) There must be a single sorted doc values join field used by both the "from" and "to" documents. This join field
* should store the join values as UTF-8 strings.
* 4) An ordinal map must be provided that is created on top of the join field.
*
* @param joinField The {@link org.apache.lucene.index.SortedDocValues} field containing the join values
* @param fromQuery The query containing the actual user query. Also the fromQuery can only match "from" documents.
* @param toQuery The query identifying all documents on the "to" side.
* @param searcher The index searcher used to execute the from query
* @param scoreMode Instructs how scores from the fromQuery are mapped to the returned query
* @param ordinalMap The ordinal map constructed over the joinField. In case of a single segment index, no ordinal map
* needs to be provided.
* @return a {@link Query} instance that can be used to join documents based on the join field
* @throws IOException If I/O related errors occur
*/
public static Query createJoinQuery(String joinField,
Query fromQuery,
Query toQuery,
IndexSearcher searcher,
ScoreMode scoreMode,
MultiDocValues.OrdinalMap ordinalMap) throws IOException {
IndexReader indexReader = searcher.getIndexReader();
int numSegments = indexReader.leaves().size();
final long valueCount;
if (numSegments == 0) {
return new MatchNoDocsQuery();
} else if (numSegments == 1) {
// No need to use the ordinal map, because there is just one segment.
ordinalMap = null;
LeafReader leafReader = searcher.getIndexReader().leaves().get(0).reader();
SortedDocValues joinSortedDocValues = leafReader.getSortedDocValues(joinField);
if (joinSortedDocValues != null) {
valueCount = joinSortedDocValues.getValueCount();
} else {
return new MatchNoDocsQuery();
}
} else {
if (ordinalMap == null) {
throw new IllegalArgumentException("OrdinalMap is required, because there is more than 1 segment");
}
valueCount = ordinalMap.getValueCount();
}
Query rewrittenFromQuery = searcher.rewrite(fromQuery);
if (scoreMode == ScoreMode.None) {
GlobalOrdinalsCollector globalOrdinalsCollector = new GlobalOrdinalsCollector(joinField, ordinalMap, valueCount);
searcher.search(fromQuery, globalOrdinalsCollector);
return new GlobalOrdinalsQuery(globalOrdinalsCollector.getCollectorOrdinals(), joinField, ordinalMap, toQuery, rewrittenFromQuery, indexReader);
}
GlobalOrdinalsWithScoreCollector globalOrdinalsWithScoreCollector;
switch (scoreMode) {
case Total:
globalOrdinalsWithScoreCollector = new GlobalOrdinalsWithScoreCollector.Sum(joinField, ordinalMap, valueCount);
break;
case Max:
globalOrdinalsWithScoreCollector = new GlobalOrdinalsWithScoreCollector.Max(joinField, ordinalMap, valueCount);
break;
case Avg:
globalOrdinalsWithScoreCollector = new GlobalOrdinalsWithScoreCollector.Avg(joinField, ordinalMap, valueCount);
break;
default:
throw new IllegalArgumentException(String.format(Locale.ROOT, "Score mode %s isn't supported.", scoreMode));
}
searcher.search(fromQuery, globalOrdinalsWithScoreCollector);
return new GlobalOrdinalsWithScoreQuery(globalOrdinalsWithScoreCollector, joinField, ordinalMap, toQuery, rewrittenFromQuery, indexReader);
}
} }

View File

@ -17,19 +17,6 @@ package org.apache.lucene.search.join;
* limitations under the License. * limitations under the License.
*/ */
import java.io.IOException;
import java.util.ArrayList;
import java.util.Collections;
import java.util.Comparator;
import java.util.HashMap;
import java.util.HashSet;
import java.util.List;
import java.util.Locale;
import java.util.Map;
import java.util.Set;
import java.util.SortedSet;
import java.util.TreeSet;
import org.apache.lucene.analysis.MockAnalyzer; import org.apache.lucene.analysis.MockAnalyzer;
import org.apache.lucene.analysis.MockTokenizer; import org.apache.lucene.analysis.MockTokenizer;
import org.apache.lucene.document.Document; import org.apache.lucene.document.Document;
@ -38,27 +25,29 @@ import org.apache.lucene.document.SortedDocValuesField;
import org.apache.lucene.document.SortedSetDocValuesField; import org.apache.lucene.document.SortedSetDocValuesField;
import org.apache.lucene.document.TextField; import org.apache.lucene.document.TextField;
import org.apache.lucene.index.BinaryDocValues; import org.apache.lucene.index.BinaryDocValues;
import org.apache.lucene.index.DirectoryReader;
import org.apache.lucene.index.DocValues; import org.apache.lucene.index.DocValues;
import org.apache.lucene.index.PostingsEnum;
import org.apache.lucene.index.IndexReader; import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.LeafReader; import org.apache.lucene.index.LeafReader;
import org.apache.lucene.index.LeafReaderContext; import org.apache.lucene.index.LeafReaderContext;
import org.apache.lucene.index.MultiDocValues;
import org.apache.lucene.index.MultiFields; import org.apache.lucene.index.MultiFields;
import org.apache.lucene.index.NoMergePolicy;
import org.apache.lucene.index.PostingsEnum;
import org.apache.lucene.index.RandomIndexWriter; import org.apache.lucene.index.RandomIndexWriter;
import org.apache.lucene.index.SlowCompositeReaderWrapper; import org.apache.lucene.index.SlowCompositeReaderWrapper;
import org.apache.lucene.index.SortedDocValues;
import org.apache.lucene.index.SortedSetDocValues; import org.apache.lucene.index.SortedSetDocValues;
import org.apache.lucene.index.Term; import org.apache.lucene.index.Term;
import org.apache.lucene.index.Terms; import org.apache.lucene.index.Terms;
import org.apache.lucene.index.TermsEnum; import org.apache.lucene.index.TermsEnum;
import org.apache.lucene.search.BooleanClause; import org.apache.lucene.search.BooleanClause;
import org.apache.lucene.search.BooleanQuery; import org.apache.lucene.search.BooleanQuery;
import org.apache.lucene.search.Collector;
import org.apache.lucene.search.DocIdSetIterator; import org.apache.lucene.search.DocIdSetIterator;
import org.apache.lucene.search.Explanation; import org.apache.lucene.search.Explanation;
import org.apache.lucene.search.FilterLeafCollector;
import org.apache.lucene.search.IndexSearcher; import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.LeafCollector;
import org.apache.lucene.search.MatchAllDocsQuery; import org.apache.lucene.search.MatchAllDocsQuery;
import org.apache.lucene.search.MultiCollector;
import org.apache.lucene.search.Query; import org.apache.lucene.search.Query;
import org.apache.lucene.search.ScoreDoc; import org.apache.lucene.search.ScoreDoc;
import org.apache.lucene.search.Scorer; import org.apache.lucene.search.Scorer;
@ -74,8 +63,22 @@ import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.FixedBitSet; import org.apache.lucene.util.FixedBitSet;
import org.apache.lucene.util.LuceneTestCase; import org.apache.lucene.util.LuceneTestCase;
import org.apache.lucene.util.TestUtil; import org.apache.lucene.util.TestUtil;
import org.apache.lucene.util.packed.PackedInts;
import org.junit.Test; import org.junit.Test;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Collections;
import java.util.Comparator;
import java.util.HashMap;
import java.util.HashSet;
import java.util.List;
import java.util.Locale;
import java.util.Map;
import java.util.Set;
import java.util.SortedSet;
import java.util.TreeSet;
public class TestJoinUtil extends LuceneTestCase { public class TestJoinUtil extends LuceneTestCase {
public void testSimple() throws Exception { public void testSimple() throws Exception {
@ -169,6 +172,180 @@ public class TestJoinUtil extends LuceneTestCase {
dir.close(); dir.close();
} }
public void testSimpleOrdinalsJoin() throws Exception {
final String idField = "id";
final String productIdField = "productId";
// A field indicating to what type a document belongs, which is then used to distinques between documents during joining.
final String typeField = "type";
// A single sorted doc values field that holds the join values for all document types.
// Typically during indexing a schema will automatically create this field with the values
final String joinField = idField + productIdField;
Directory dir = newDirectory();
RandomIndexWriter w = new RandomIndexWriter(
random(),
dir,
newIndexWriterConfig(new MockAnalyzer(random())).setMergePolicy(NoMergePolicy.INSTANCE));
// 0
Document doc = new Document();
doc.add(new TextField(idField, "1", Field.Store.NO));
doc.add(new TextField(typeField, "product", Field.Store.NO));
doc.add(new TextField("description", "random text", Field.Store.NO));
doc.add(new TextField("name", "name1", Field.Store.NO));
doc.add(new SortedDocValuesField(joinField, new BytesRef("1")));
w.addDocument(doc);
// 1
doc = new Document();
doc.add(new TextField(productIdField, "1", Field.Store.NO));
doc.add(new TextField(typeField, "price", Field.Store.NO));
doc.add(new TextField("price", "10.0", Field.Store.NO));
doc.add(new SortedDocValuesField(joinField, new BytesRef("1")));
w.addDocument(doc);
// 2
doc = new Document();
doc.add(new TextField(productIdField, "1", Field.Store.NO));
doc.add(new TextField(typeField, "price", Field.Store.NO));
doc.add(new TextField("price", "20.0", Field.Store.NO));
doc.add(new SortedDocValuesField(joinField, new BytesRef("1")));
w.addDocument(doc);
// 3
doc = new Document();
doc.add(new TextField(idField, "2", Field.Store.NO));
doc.add(new TextField(typeField, "product", Field.Store.NO));
doc.add(new TextField("description", "more random text", Field.Store.NO));
doc.add(new TextField("name", "name2", Field.Store.NO));
doc.add(new SortedDocValuesField(joinField, new BytesRef("2")));
w.addDocument(doc);
w.commit();
// 4
doc = new Document();
doc.add(new TextField(productIdField, "2", Field.Store.NO));
doc.add(new TextField(typeField, "price", Field.Store.NO));
doc.add(new TextField("price", "10.0", Field.Store.NO));
doc.add(new SortedDocValuesField(joinField, new BytesRef("2")));
w.addDocument(doc);
// 5
doc = new Document();
doc.add(new TextField(productIdField, "2", Field.Store.NO));
doc.add(new TextField(typeField, "price", Field.Store.NO));
doc.add(new TextField("price", "20.0", Field.Store.NO));
doc.add(new SortedDocValuesField(joinField, new BytesRef("2")));
w.addDocument(doc);
IndexSearcher indexSearcher = new IndexSearcher(w.getReader());
w.close();
IndexReader r = indexSearcher.getIndexReader();
SortedDocValues[] values = new SortedDocValues[r.leaves().size()];
for (int i = 0; i < values.length; i++) {
LeafReader leafReader = r.leaves().get(i).reader();
values[i] = DocValues.getSorted(leafReader, joinField);
}
MultiDocValues.OrdinalMap ordinalMap = MultiDocValues.OrdinalMap.build(
r.getCoreCacheKey(), values, PackedInts.DEFAULT
);
Query toQuery = new TermQuery(new Term(typeField, "price"));
Query fromQuery = new TermQuery(new Term("name", "name2"));
// Search for product and return prices
Query joinQuery = JoinUtil.createJoinQuery(joinField, fromQuery, toQuery, indexSearcher, ScoreMode.None, ordinalMap);
TopDocs result = indexSearcher.search(joinQuery, 10);
assertEquals(2, result.totalHits);
assertEquals(4, result.scoreDocs[0].doc);
assertEquals(5, result.scoreDocs[1].doc);
fromQuery = new TermQuery(new Term("name", "name1"));
joinQuery = JoinUtil.createJoinQuery(joinField, fromQuery, toQuery, indexSearcher, ScoreMode.None, ordinalMap);
result = indexSearcher.search(joinQuery, 10);
assertEquals(2, result.totalHits);
assertEquals(1, result.scoreDocs[0].doc);
assertEquals(2, result.scoreDocs[1].doc);
// Search for prices and return products
fromQuery = new TermQuery(new Term("price", "20.0"));
toQuery = new TermQuery(new Term(typeField, "product"));
joinQuery = JoinUtil.createJoinQuery(joinField, fromQuery, toQuery, indexSearcher, ScoreMode.None, ordinalMap);
result = indexSearcher.search(joinQuery, 10);
assertEquals(2, result.totalHits);
assertEquals(0, result.scoreDocs[0].doc);
assertEquals(3, result.scoreDocs[1].doc);
indexSearcher.getIndexReader().close();
dir.close();
}
public void testRandomOrdinalsJoin() throws Exception {
Directory dir = newDirectory();
RandomIndexWriter w = new RandomIndexWriter(
random(),
dir,
newIndexWriterConfig(new MockAnalyzer(random(), MockTokenizer.KEYWORD, false)).setMergePolicy(newLogMergePolicy())
);
IndexIterationContext context = createContext(100, w, false, true);
w.forceMerge(1);
w.close();
IndexReader topLevelReader = DirectoryReader.open(dir);
SortedDocValues[] values = new SortedDocValues[topLevelReader.leaves().size()];
for (LeafReaderContext leadContext : topLevelReader.leaves()) {
values[leadContext.ord] = DocValues.getSorted(leadContext.reader(), "join_field");
}
context.ordinalMap = MultiDocValues.OrdinalMap.build(
topLevelReader.getCoreCacheKey(), values, PackedInts.DEFAULT
);
IndexSearcher indexSearcher = newSearcher(topLevelReader);
int r = random().nextInt(context.randomUniqueValues.length);
boolean from = context.randomFrom[r];
String randomValue = context.randomUniqueValues[r];
BitSet expectedResult = createExpectedResult(randomValue, from, indexSearcher.getIndexReader(), context);
final Query actualQuery = new TermQuery(new Term("value", randomValue));
if (VERBOSE) {
System.out.println("actualQuery=" + actualQuery);
}
final ScoreMode scoreMode = ScoreMode.values()[random().nextInt(ScoreMode.values().length)];
if (VERBOSE) {
System.out.println("scoreMode=" + scoreMode);
}
final Query joinQuery;
if (from) {
BooleanQuery fromQuery = new BooleanQuery();
fromQuery.add(new TermQuery(new Term("type", "from")), BooleanClause.Occur.FILTER);
fromQuery.add(actualQuery, BooleanClause.Occur.MUST);
Query toQuery = new TermQuery(new Term("type", "to"));
joinQuery = JoinUtil.createJoinQuery("join_field", fromQuery, toQuery, indexSearcher, scoreMode, context.ordinalMap);
} else {
BooleanQuery fromQuery = new BooleanQuery();
fromQuery.add(new TermQuery(new Term("type", "to")), BooleanClause.Occur.FILTER);
fromQuery.add(actualQuery, BooleanClause.Occur.MUST);
Query toQuery = new TermQuery(new Term("type", "from"));
joinQuery = JoinUtil.createJoinQuery("join_field", fromQuery, toQuery, indexSearcher, scoreMode, context.ordinalMap);
}
if (VERBOSE) {
System.out.println("joinQuery=" + joinQuery);
}
final BitSet actualResult = new FixedBitSet(indexSearcher.getIndexReader().maxDoc());
final TopScoreDocCollector topScoreDocCollector = TopScoreDocCollector.create(10);
indexSearcher.search(joinQuery, MultiCollector.wrap(new BitSetCollector(actualResult), topScoreDocCollector));
assertBitSet(expectedResult, actualResult, indexSearcher);
TopDocs expectedTopDocs = createExpectedTopDocs(randomValue, from, scoreMode, context);
TopDocs actualTopDocs = topScoreDocCollector.topDocs();
assertTopDocs(expectedTopDocs, actualTopDocs, scoreMode, indexSearcher, joinQuery);
topLevelReader.close();
dir.close();
}
// TermsWithScoreCollector.MV.Avg forgets to grow beyond TermsWithScoreCollector.INITIAL_ARRAY_SIZE // TermsWithScoreCollector.MV.Avg forgets to grow beyond TermsWithScoreCollector.INITIAL_ARRAY_SIZE
public void testOverflowTermsWithScoreCollector() throws Exception { public void testOverflowTermsWithScoreCollector() throws Exception {
test300spartans(true, ScoreMode.Avg); test300spartans(true, ScoreMode.Avg);
@ -218,7 +395,7 @@ public class TestJoinUtil extends LuceneTestCase {
TopDocs result = indexSearcher.search(joinQuery, 10); TopDocs result = indexSearcher.search(joinQuery, 10);
assertEquals(1, result.totalHits); assertEquals(1, result.totalHits);
assertEquals(0, result.scoreDocs[0].doc); assertEquals(0, result.scoreDocs[0].doc);
indexSearcher.getIndexReader().close(); indexSearcher.getIndexReader().close();
dir.close(); dir.close();
@ -310,7 +487,7 @@ public class TestJoinUtil extends LuceneTestCase {
assertFalse("optimized bulkScorer was not used for join query embedded in boolean query!", sawFive); assertFalse("optimized bulkScorer was not used for join query embedded in boolean query!", sawFive);
} }
} }
@Override @Override
public boolean needsScores() { public boolean needsScores() {
return false; return false;
@ -448,7 +625,7 @@ public class TestJoinUtil extends LuceneTestCase {
dir, dir,
newIndexWriterConfig(new MockAnalyzer(random(), MockTokenizer.KEYWORD, false)).setMergePolicy(newLogMergePolicy()) newIndexWriterConfig(new MockAnalyzer(random(), MockTokenizer.KEYWORD, false)).setMergePolicy(newLogMergePolicy())
); );
IndexIterationContext context = createContext(numberOfDocumentsToIndex, w, multipleValuesPerDocument); IndexIterationContext context = createContext(numberOfDocumentsToIndex, w, multipleValuesPerDocument, false);
IndexReader topLevelReader = w.getReader(); IndexReader topLevelReader = w.getReader();
w.close(); w.close();
@ -485,73 +662,64 @@ public class TestJoinUtil extends LuceneTestCase {
// Need to know all documents that have matches. TopDocs doesn't give me that and then I'd be also testing TopDocsCollector... // Need to know all documents that have matches. TopDocs doesn't give me that and then I'd be also testing TopDocsCollector...
final BitSet actualResult = new FixedBitSet(indexSearcher.getIndexReader().maxDoc()); final BitSet actualResult = new FixedBitSet(indexSearcher.getIndexReader().maxDoc());
final TopScoreDocCollector topScoreDocCollector = TopScoreDocCollector.create(10); final TopScoreDocCollector topScoreDocCollector = TopScoreDocCollector.create(10);
indexSearcher.search(joinQuery, new Collector() { indexSearcher.search(joinQuery, MultiCollector.wrap(new BitSetCollector(actualResult), topScoreDocCollector));
@Override
public LeafCollector getLeafCollector(LeafReaderContext context) throws IOException {
final int docBase = context.docBase;
final LeafCollector in = topScoreDocCollector.getLeafCollector(context);
return new FilterLeafCollector(in) {
@Override
public void collect(int doc) throws IOException {
super.collect(doc);
actualResult.set(doc + docBase);
}
};
}
@Override
public boolean needsScores() {
return topScoreDocCollector.needsScores();
}
});
// Asserting bit set... // Asserting bit set...
if (VERBOSE) { assertBitSet(expectedResult, actualResult, indexSearcher);
System.out.println("expected cardinality:" + expectedResult.cardinality());
DocIdSetIterator iterator = new BitSetIterator(expectedResult, expectedResult.cardinality());
for (int doc = iterator.nextDoc(); doc != DocIdSetIterator.NO_MORE_DOCS; doc = iterator.nextDoc()) {
System.out.println(String.format(Locale.ROOT, "Expected doc[%d] with id value %s", doc, indexSearcher.doc(doc).get("id")));
}
System.out.println("actual cardinality:" + actualResult.cardinality());
iterator = new BitSetIterator(actualResult, actualResult.cardinality());
for (int doc = iterator.nextDoc(); doc != DocIdSetIterator.NO_MORE_DOCS; doc = iterator.nextDoc()) {
System.out.println(String.format(Locale.ROOT, "Actual doc[%d] with id value %s", doc, indexSearcher.doc(doc).get("id")));
}
}
assertEquals(expectedResult, actualResult);
// Asserting TopDocs... // Asserting TopDocs...
TopDocs expectedTopDocs = createExpectedTopDocs(randomValue, from, scoreMode, context); TopDocs expectedTopDocs = createExpectedTopDocs(randomValue, from, scoreMode, context);
TopDocs actualTopDocs = topScoreDocCollector.topDocs(); TopDocs actualTopDocs = topScoreDocCollector.topDocs();
assertEquals(expectedTopDocs.totalHits, actualTopDocs.totalHits); assertTopDocs(expectedTopDocs, actualTopDocs, scoreMode, indexSearcher, joinQuery);
assertEquals(expectedTopDocs.scoreDocs.length, actualTopDocs.scoreDocs.length);
if (scoreMode == ScoreMode.None) {
continue;
}
assertEquals(expectedTopDocs.getMaxScore(), actualTopDocs.getMaxScore(), 0.0f);
for (int i = 0; i < expectedTopDocs.scoreDocs.length; i++) {
if (VERBOSE) {
System.out.printf(Locale.ENGLISH, "Expected doc: %d | Actual doc: %d\n", expectedTopDocs.scoreDocs[i].doc, actualTopDocs.scoreDocs[i].doc);
System.out.printf(Locale.ENGLISH, "Expected score: %f | Actual score: %f\n", expectedTopDocs.scoreDocs[i].score, actualTopDocs.scoreDocs[i].score);
}
assertEquals(expectedTopDocs.scoreDocs[i].doc, actualTopDocs.scoreDocs[i].doc);
assertEquals(expectedTopDocs.scoreDocs[i].score, actualTopDocs.scoreDocs[i].score, 0.0f);
Explanation explanation = indexSearcher.explain(joinQuery, expectedTopDocs.scoreDocs[i].doc);
assertEquals(expectedTopDocs.scoreDocs[i].score, explanation.getValue(), 0.0f);
}
} }
topLevelReader.close(); topLevelReader.close();
dir.close(); dir.close();
} }
} }
private IndexIterationContext createContext(int nDocs, RandomIndexWriter writer, boolean multipleValuesPerDocument) throws IOException { private void assertBitSet(BitSet expectedResult, BitSet actualResult, IndexSearcher indexSearcher) throws IOException {
return createContext(nDocs, writer, writer, multipleValuesPerDocument); if (VERBOSE) {
System.out.println("expected cardinality:" + expectedResult.cardinality());
DocIdSetIterator iterator = new BitSetIterator(expectedResult, expectedResult.cardinality());
for (int doc = iterator.nextDoc(); doc != DocIdSetIterator.NO_MORE_DOCS; doc = iterator.nextDoc()) {
System.out.println(String.format(Locale.ROOT, "Expected doc[%d] with id value %s", doc, indexSearcher.doc(doc).get("id")));
}
System.out.println("actual cardinality:" + actualResult.cardinality());
iterator = new BitSetIterator(actualResult, actualResult.cardinality());
for (int doc = iterator.nextDoc(); doc != DocIdSetIterator.NO_MORE_DOCS; doc = iterator.nextDoc()) {
System.out.println(String.format(Locale.ROOT, "Actual doc[%d] with id value %s", doc, indexSearcher.doc(doc).get("id")));
}
}
assertEquals(expectedResult, actualResult);
} }
private IndexIterationContext createContext(int nDocs, RandomIndexWriter fromWriter, RandomIndexWriter toWriter, boolean multipleValuesPerDocument) throws IOException { private void assertTopDocs(TopDocs expectedTopDocs, TopDocs actualTopDocs, ScoreMode scoreMode, IndexSearcher indexSearcher, Query joinQuery) throws IOException {
assertEquals(expectedTopDocs.totalHits, actualTopDocs.totalHits);
assertEquals(expectedTopDocs.scoreDocs.length, actualTopDocs.scoreDocs.length);
if (scoreMode == ScoreMode.None) {
return;
}
assertEquals(expectedTopDocs.getMaxScore(), actualTopDocs.getMaxScore(), 0.0f);
for (int i = 0; i < expectedTopDocs.scoreDocs.length; i++) {
if (VERBOSE) {
System.out.printf(Locale.ENGLISH, "Expected doc: %d | Actual doc: %d\n", expectedTopDocs.scoreDocs[i].doc, actualTopDocs.scoreDocs[i].doc);
System.out.printf(Locale.ENGLISH, "Expected score: %f | Actual score: %f\n", expectedTopDocs.scoreDocs[i].score, actualTopDocs.scoreDocs[i].score);
}
assertEquals(expectedTopDocs.scoreDocs[i].doc, actualTopDocs.scoreDocs[i].doc);
assertEquals(expectedTopDocs.scoreDocs[i].score, actualTopDocs.scoreDocs[i].score, 0.0f);
Explanation explanation = indexSearcher.explain(joinQuery, expectedTopDocs.scoreDocs[i].doc);
assertEquals(expectedTopDocs.scoreDocs[i].score, explanation.getValue(), 0.0f);
}
}
private IndexIterationContext createContext(int nDocs, RandomIndexWriter writer, boolean multipleValuesPerDocument, boolean ordinalJoin) throws IOException {
return createContext(nDocs, writer, writer, multipleValuesPerDocument, ordinalJoin);
}
private IndexIterationContext createContext(int nDocs, RandomIndexWriter fromWriter, RandomIndexWriter toWriter, boolean multipleValuesPerDocument, boolean globalOrdinalJoin) throws IOException {
if (globalOrdinalJoin) {
assertFalse("ordinal join doesn't support multiple join values per document", multipleValuesPerDocument);
}
IndexIterationContext context = new IndexIterationContext(); IndexIterationContext context = new IndexIterationContext();
int numRandomValues = nDocs / 2; int numRandomValues = nDocs / 2;
context.randomUniqueValues = new String[numRandomValues]; context.randomUniqueValues = new String[numRandomValues];
@ -560,8 +728,8 @@ public class TestJoinUtil extends LuceneTestCase {
for (int i = 0; i < numRandomValues; i++) { for (int i = 0; i < numRandomValues; i++) {
String uniqueRandomValue; String uniqueRandomValue;
do { do {
uniqueRandomValue = TestUtil.randomRealisticUnicodeString(random()); // uniqueRandomValue = TestUtil.randomRealisticUnicodeString(random());
// uniqueRandomValue = _TestUtil.randomSimpleString(random); uniqueRandomValue = TestUtil.randomSimpleString(random());
} while ("".equals(uniqueRandomValue) || trackSet.contains(uniqueRandomValue)); } while ("".equals(uniqueRandomValue) || trackSet.contains(uniqueRandomValue));
// Generate unique values and empty strings aren't allowed. // Generate unique values and empty strings aren't allowed.
trackSet.add(uniqueRandomValue); trackSet.add(uniqueRandomValue);
@ -581,15 +749,18 @@ public class TestJoinUtil extends LuceneTestCase {
boolean from = context.randomFrom[randomI]; boolean from = context.randomFrom[randomI];
int numberOfLinkValues = multipleValuesPerDocument ? 2 + random().nextInt(10) : 1; int numberOfLinkValues = multipleValuesPerDocument ? 2 + random().nextInt(10) : 1;
docs[i] = new RandomDoc(id, numberOfLinkValues, value, from); docs[i] = new RandomDoc(id, numberOfLinkValues, value, from);
if (globalOrdinalJoin) {
document.add(newStringField("type", from ? "from" : "to", Field.Store.NO));
}
for (int j = 0; j < numberOfLinkValues; j++) { for (int j = 0; j < numberOfLinkValues; j++) {
String linkValue = context.randomUniqueValues[random().nextInt(context.randomUniqueValues.length)]; String linkValue = context.randomUniqueValues[random().nextInt(context.randomUniqueValues.length)];
docs[i].linkValues.add(linkValue); docs[i].linkValues.add(linkValue);
if (from) { if (from) {
if (!context.fromDocuments.containsKey(linkValue)) { if (!context.fromDocuments.containsKey(linkValue)) {
context.fromDocuments.put(linkValue, new ArrayList<RandomDoc>()); context.fromDocuments.put(linkValue, new ArrayList<>());
} }
if (!context.randomValueFromDocs.containsKey(value)) { if (!context.randomValueFromDocs.containsKey(value)) {
context.randomValueFromDocs.put(value, new ArrayList<RandomDoc>()); context.randomValueFromDocs.put(value, new ArrayList<>());
} }
context.fromDocuments.get(linkValue).add(docs[i]); context.fromDocuments.get(linkValue).add(docs[i]);
@ -600,12 +771,15 @@ public class TestJoinUtil extends LuceneTestCase {
} else { } else {
document.add(new SortedDocValuesField("from", new BytesRef(linkValue))); document.add(new SortedDocValuesField("from", new BytesRef(linkValue)));
} }
if (globalOrdinalJoin) {
document.add(new SortedDocValuesField("join_field", new BytesRef(linkValue)));
}
} else { } else {
if (!context.toDocuments.containsKey(linkValue)) { if (!context.toDocuments.containsKey(linkValue)) {
context.toDocuments.put(linkValue, new ArrayList<RandomDoc>()); context.toDocuments.put(linkValue, new ArrayList<>());
} }
if (!context.randomValueToDocs.containsKey(value)) { if (!context.randomValueToDocs.containsKey(value)) {
context.randomValueToDocs.put(value, new ArrayList<RandomDoc>()); context.randomValueToDocs.put(value, new ArrayList<>());
} }
context.toDocuments.get(linkValue).add(docs[i]); context.toDocuments.get(linkValue).add(docs[i]);
@ -616,6 +790,9 @@ public class TestJoinUtil extends LuceneTestCase {
} else { } else {
document.add(new SortedDocValuesField("to", new BytesRef(linkValue))); document.add(new SortedDocValuesField("to", new BytesRef(linkValue)));
} }
if (globalOrdinalJoin) {
document.add(new SortedDocValuesField("join_field", new BytesRef(linkValue)));
}
} }
} }
@ -707,6 +884,9 @@ public class TestJoinUtil extends LuceneTestCase {
if (joinScore == null) { if (joinScore == null) {
joinValueToJoinScores.put(BytesRef.deepCopyOf(joinValue), joinScore = new JoinScore()); joinValueToJoinScores.put(BytesRef.deepCopyOf(joinValue), joinScore = new JoinScore());
} }
if (VERBOSE) {
System.out.println("expected val=" + joinValue.utf8ToString() + " expected score=" + scorer.score());
}
joinScore.addScore(scorer.score()); joinScore.addScore(scorer.score());
} }
@ -720,7 +900,7 @@ public class TestJoinUtil extends LuceneTestCase {
public void setScorer(Scorer scorer) { public void setScorer(Scorer scorer) {
this.scorer = scorer; this.scorer = scorer;
} }
@Override @Override
public boolean needsScores() { public boolean needsScores() {
return true; return true;
@ -777,7 +957,7 @@ public class TestJoinUtil extends LuceneTestCase {
@Override @Override
public void setScorer(Scorer scorer) {} public void setScorer(Scorer scorer) {}
@Override @Override
public boolean needsScores() { public boolean needsScores() {
return false; return false;
@ -875,6 +1055,7 @@ public class TestJoinUtil extends LuceneTestCase {
Map<String, Map<Integer, JoinScore>> fromHitsToJoinScore = new HashMap<>(); Map<String, Map<Integer, JoinScore>> fromHitsToJoinScore = new HashMap<>();
Map<String, Map<Integer, JoinScore>> toHitsToJoinScore = new HashMap<>(); Map<String, Map<Integer, JoinScore>> toHitsToJoinScore = new HashMap<>();
MultiDocValues.OrdinalMap ordinalMap;
} }
private static class RandomDoc { private static class RandomDoc {
@ -922,4 +1103,29 @@ public class TestJoinUtil extends LuceneTestCase {
} }
private static class BitSetCollector extends SimpleCollector {
private final BitSet bitSet;
private int docBase;
private BitSetCollector(BitSet bitSet) {
this.bitSet = bitSet;
}
@Override
public void collect(int doc) throws IOException {
bitSet.set(docBase + doc);
}
@Override
protected void doSetNextReader(LeafReaderContext context) throws IOException {
docBase = context.docBase;
}
@Override
public boolean needsScores() {
return false;
}
}
} }

View File

@ -1 +0,0 @@
0ec13f6423eb6d5858e229939a2bc118473ef94c

View File

@ -0,0 +1 @@
016d0bc512222f1253ee6b64d389c84e22f697f0

View File

@ -1 +0,0 @@
11393498b38e9695d0850cac26fde5613ae268b9

View File

@ -0,0 +1 @@
f5aa318bda4c6c8d688c9d00b90681dcd82ce636

View File

@ -43,7 +43,6 @@ import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.IndexWriterConfig; import org.apache.lucene.index.IndexWriterConfig;
import org.apache.lucene.index.LeafReaderContext; import org.apache.lucene.index.LeafReaderContext;
import org.apache.lucene.index.RandomIndexWriter; import org.apache.lucene.index.RandomIndexWriter;
import org.apache.lucene.index.StorableField;
import org.apache.lucene.index.StoredDocument; import org.apache.lucene.index.StoredDocument;
import org.apache.lucene.index.Term; import org.apache.lucene.index.Term;
import org.apache.lucene.queries.TermsQuery; import org.apache.lucene.queries.TermsQuery;
@ -57,11 +56,9 @@ import org.apache.lucene.search.TopDocs;
import org.apache.lucene.store.Directory; import org.apache.lucene.store.Directory;
import org.apache.lucene.util.Bits; import org.apache.lucene.util.Bits;
import org.apache.lucene.util.BytesRef; import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.BytesRefBuilder;
import org.apache.lucene.util.FixedBitSet; import org.apache.lucene.util.FixedBitSet;
import org.apache.lucene.util.LineFileDocs; import org.apache.lucene.util.LineFileDocs;
import org.apache.lucene.util.LuceneTestCase; import org.apache.lucene.util.LuceneTestCase;
import org.apache.lucene.util.NumericUtils;
import org.apache.lucene.util.TestUtil; import org.apache.lucene.util.TestUtil;
import org.junit.After; import org.junit.After;
import org.junit.Before; import org.junit.Before;
@ -158,9 +155,10 @@ public class SuggestFieldTest extends LuceneTestCase {
weights[i] = Math.abs(random().nextLong()); weights[i] = Math.abs(random().nextLong());
document.add(newSuggestField("suggest_field", "abc", weights[i])); document.add(newSuggestField("suggest_field", "abc", weights[i]));
iw.addDocument(document); iw.addDocument(document);
}
if (rarely()) { if (usually()) {
iw.commit(); iw.commit();
}
} }
DirectoryReader reader = iw.getReader(); DirectoryReader reader = iw.getReader();
@ -200,11 +198,15 @@ public class SuggestFieldTest extends LuceneTestCase {
} }
iw.addDocument(document); iw.addDocument(document);
document.clear(); document.clear();
if (usually()) {
iw.commit();
}
} }
iw.deleteDocuments(new Term("str_field", "delete")); iw.deleteDocuments(new Term("str_field", "delete"));
DirectoryReader reader = DirectoryReader.open(iw, false); DirectoryReader reader = DirectoryReader.open(iw, true);
SuggestIndexSearcher indexSearcher = new SuggestIndexSearcher(reader, analyzer); SuggestIndexSearcher indexSearcher = new SuggestIndexSearcher(reader, analyzer);
TopSuggestDocs suggest = indexSearcher.suggest("suggest_field", "abc_", numLive); TopSuggestDocs suggest = indexSearcher.suggest("suggest_field", "abc_", numLive);
assertSuggestions(suggest, expectedEntries.toArray(new Entry[expectedEntries.size()])); assertSuggestions(suggest, expectedEntries.toArray(new Entry[expectedEntries.size()]));
@ -224,6 +226,10 @@ public class SuggestFieldTest extends LuceneTestCase {
document.add(newStringField("str_fld", "deleted", Field.Store.NO)); document.add(newStringField("str_fld", "deleted", Field.Store.NO));
iw.addDocument(document); iw.addDocument(document);
document.clear(); document.clear();
if (usually()) {
iw.commit();
}
} }
Filter filter = new QueryWrapperFilter(new TermsQuery("str_fld", new BytesRef("non_existent"))); Filter filter = new QueryWrapperFilter(new TermsQuery("str_fld", new BytesRef("non_existent")));
@ -249,11 +255,15 @@ public class SuggestFieldTest extends LuceneTestCase {
document.add(newStringField("delete", "delete", Field.Store.NO)); document.add(newStringField("delete", "delete", Field.Store.NO));
iw.addDocument(document); iw.addDocument(document);
document.clear(); document.clear();
if (usually()) {
iw.commit();
}
} }
iw.deleteDocuments(new Term("delete", "delete")); iw.deleteDocuments(new Term("delete", "delete"));
DirectoryReader reader = DirectoryReader.open(iw, false); DirectoryReader reader = DirectoryReader.open(iw, true);
SuggestIndexSearcher indexSearcher = new SuggestIndexSearcher(reader, analyzer); SuggestIndexSearcher indexSearcher = new SuggestIndexSearcher(reader, analyzer);
TopSuggestDocs suggest = indexSearcher.suggest("suggest_field", "abc_", num); TopSuggestDocs suggest = indexSearcher.suggest("suggest_field", "abc_", num);
assertThat(suggest.totalHits, equalTo(0)); assertThat(suggest.totalHits, equalTo(0));
@ -274,6 +284,10 @@ public class SuggestFieldTest extends LuceneTestCase {
document.add(new IntField("weight_fld", i, Field.Store.YES)); document.add(new IntField("weight_fld", i, Field.Store.YES));
iw.addDocument(document); iw.addDocument(document);
document.clear(); document.clear();
if (usually()) {
iw.commit();
}
} }
iw.deleteDocuments(NumericRangeQuery.newIntRange("weight_fld", 2, null, true, false)); iw.deleteDocuments(NumericRangeQuery.newIntRange("weight_fld", 2, null, true, false));
@ -298,6 +312,10 @@ public class SuggestFieldTest extends LuceneTestCase {
document.add(new IntField("filter_int_fld", i, Field.Store.NO)); document.add(new IntField("filter_int_fld", i, Field.Store.NO));
iw.addDocument(document); iw.addDocument(document);
document.clear(); document.clear();
if (usually()) {
iw.commit();
}
} }
DirectoryReader reader = iw.getReader(); DirectoryReader reader = iw.getReader();
@ -542,6 +560,10 @@ public class SuggestFieldTest extends LuceneTestCase {
document.add(newSuggestField("suggest_field", suggest, weight)); document.add(newSuggestField("suggest_field", suggest, weight));
mappings.put(suggest, weight); mappings.put(suggest, weight);
iw.addDocument(document); iw.addDocument(document);
if (usually()) {
iw.commit();
}
} }
DirectoryReader reader = iw.getReader(); DirectoryReader reader = iw.getReader();

View File

@ -263,6 +263,9 @@ public abstract class SearchEquivalenceTestBase extends LuceneTestCase {
* Both queries will be filtered by <code>filter</code> * Both queries will be filtered by <code>filter</code>
*/ */
protected void assertSubsetOf(Query q1, Query q2, Filter filter) throws Exception { protected void assertSubsetOf(Query q1, Query q2, Filter filter) throws Exception {
QueryUtils.check(q1);
QueryUtils.check(q2);
if (filter != null) { if (filter != null) {
q1 = new FilteredQuery(q1, filter); q1 = new FilteredQuery(q1, filter);
q2 = new FilteredQuery(q2, filter); q2 = new FilteredQuery(q2, filter);

View File

@ -78,6 +78,9 @@ Detailed Change List
New Features New Features
---------------------- ----------------------
* SOLR-6637: Solr should have a way to restore a core from a backed up index.
(Varun Thacker, noble, shalin)
Bug Fixes Bug Fixes
---------------------- ----------------------
@ -90,6 +93,11 @@ Optimizations
* SOLR-7324: IndexFetcher does not need to call isIndexStale if full copy is already needed * SOLR-7324: IndexFetcher does not need to call isIndexStale if full copy is already needed
(Stephan Lagraulet via Varun Thacker) (Stephan Lagraulet via Varun Thacker)
Other Changes
----------------------
* SOLR-6865: Upgrade HttpClient to 4.4.1 (Shawn Heisey)
================== 5.1.0 ================== ================== 5.1.0 ==================
Consult the LUCENE_CHANGES.txt file for additional, low level, changes in this release Consult the LUCENE_CHANGES.txt file for additional, low level, changes in this release

View File

@ -29,6 +29,7 @@ import java.nio.channels.FileChannel;
import java.nio.charset.StandardCharsets; import java.nio.charset.StandardCharsets;
import java.nio.file.Files; import java.nio.file.Files;
import java.nio.file.NoSuchFileException; import java.nio.file.NoSuchFileException;
import java.nio.file.Paths;
import java.text.SimpleDateFormat; import java.text.SimpleDateFormat;
import java.util.ArrayList; import java.util.ArrayList;
import java.util.Arrays; import java.util.Arrays;
@ -246,31 +247,31 @@ public class IndexFetcher {
} }
} }
boolean fetchLatestIndex(final SolrCore core, boolean forceReplication) throws IOException, InterruptedException { boolean fetchLatestIndex(boolean forceReplication) throws IOException, InterruptedException {
return fetchLatestIndex(core, forceReplication, false); return fetchLatestIndex(forceReplication, false);
} }
/** /**
* This command downloads all the necessary files from master to install a index commit point. Only changed files are * This command downloads all the necessary files from master to install a index commit point. Only changed files are
* downloaded. It also downloads the conf files (if they are modified). * downloaded. It also downloads the conf files (if they are modified).
* *
* @param core the SolrCore
* @param forceReplication force a replication in all cases * @param forceReplication force a replication in all cases
* @param forceCoreReload force a core reload in all cases * @param forceCoreReload force a core reload in all cases
* @return true on success, false if slave is already in sync * @return true on success, false if slave is already in sync
* @throws IOException if an exception occurs * @throws IOException if an exception occurs
*/ */
boolean fetchLatestIndex(final SolrCore core, boolean forceReplication, boolean forceCoreReload) throws IOException, InterruptedException { boolean fetchLatestIndex(boolean forceReplication, boolean forceCoreReload) throws IOException, InterruptedException {
boolean cleanupDone = false; boolean cleanupDone = false;
boolean successfulInstall = false; boolean successfulInstall = false;
replicationStartTime = System.currentTimeMillis(); replicationStartTime = System.currentTimeMillis();
Directory tmpIndexDir = null; Directory tmpIndexDir = null;
String tmpIndex = null; String tmpIndex;
Directory indexDir = null; Directory indexDir = null;
String indexDirPath = null; String indexDirPath;
boolean deleteTmpIdxDir = true; boolean deleteTmpIdxDir = true;
if (!core.getSolrCoreState().getLastReplicateIndexSuccess()) { if (!solrCore.getSolrCoreState().getLastReplicateIndexSuccess()) {
// if the last replication was not a success, we force a full replication // if the last replication was not a success, we force a full replication
// when we are a bit more confident we may want to try a partial replication // when we are a bit more confident we may want to try a partial replication
// if the error is connection related or something, but we have to be careful // if the error is connection related or something, but we have to be careful
@ -279,7 +280,7 @@ public class IndexFetcher {
try { try {
//get the current 'replicateable' index version in the master //get the current 'replicateable' index version in the master
NamedList response = null; NamedList response;
try { try {
response = getLatestVersion(); response = getLatestVersion();
} catch (Exception e) { } catch (Exception e) {
@ -290,12 +291,12 @@ public class IndexFetcher {
long latestGeneration = (Long) response.get(GENERATION); long latestGeneration = (Long) response.get(GENERATION);
// TODO: make sure that getLatestCommit only returns commit points for the main index (i.e. no side-car indexes) // TODO: make sure that getLatestCommit only returns commit points for the main index (i.e. no side-car indexes)
IndexCommit commit = core.getDeletionPolicy().getLatestCommit(); IndexCommit commit = solrCore.getDeletionPolicy().getLatestCommit();
if (commit == null) { if (commit == null) {
// Presumably the IndexWriter hasn't been opened yet, and hence the deletion policy hasn't been updated with commit points // Presumably the IndexWriter hasn't been opened yet, and hence the deletion policy hasn't been updated with commit points
RefCounted<SolrIndexSearcher> searcherRefCounted = null; RefCounted<SolrIndexSearcher> searcherRefCounted = null;
try { try {
searcherRefCounted = core.getNewestSearcher(false); searcherRefCounted = solrCore.getNewestSearcher(false);
if (searcherRefCounted == null) { if (searcherRefCounted == null) {
LOG.warn("No open searcher found - fetch aborted"); LOG.warn("No open searcher found - fetch aborted");
return false; return false;
@ -312,15 +313,14 @@ public class IndexFetcher {
if (forceReplication && commit.getGeneration() != 0) { if (forceReplication && commit.getGeneration() != 0) {
// since we won't get the files for an empty index, // since we won't get the files for an empty index,
// we just clear ours and commit // we just clear ours and commit
RefCounted<IndexWriter> iw = core.getUpdateHandler().getSolrCoreState().getIndexWriter(core); RefCounted<IndexWriter> iw = solrCore.getUpdateHandler().getSolrCoreState().getIndexWriter(solrCore);
try { try {
iw.get().deleteAll(); iw.get().deleteAll();
} finally { } finally {
iw.decref(); iw.decref();
} }
SolrQueryRequest req = new LocalSolrQueryRequest(core, SolrQueryRequest req = new LocalSolrQueryRequest(solrCore, new ModifiableSolrParams());
new ModifiableSolrParams()); solrCore.getUpdateHandler().commit(new CommitUpdateCommand(req, false));
core.getUpdateHandler().commit(new CommitUpdateCommand(req, false));
} }
//there is nothing to be replicated //there is nothing to be replicated
@ -340,7 +340,9 @@ public class IndexFetcher {
// get the list of files first // get the list of files first
fetchFileList(latestGeneration); fetchFileList(latestGeneration);
// this can happen if the commit point is deleted before we fetch the file list. // this can happen if the commit point is deleted before we fetch the file list.
if(filesToDownload.isEmpty()) return false; if (filesToDownload.isEmpty()) {
return false;
}
LOG.info("Number of files in latest index in master: " + filesToDownload.size()); LOG.info("Number of files in latest index in master: " + filesToDownload.size());
// Create the sync service // Create the sync service
@ -354,13 +356,13 @@ public class IndexFetcher {
|| commit.getGeneration() >= latestGeneration || forceReplication; || commit.getGeneration() >= latestGeneration || forceReplication;
String tmpIdxDirName = "index." + new SimpleDateFormat(SnapShooter.DATE_FMT, Locale.ROOT).format(new Date()); String tmpIdxDirName = "index." + new SimpleDateFormat(SnapShooter.DATE_FMT, Locale.ROOT).format(new Date());
tmpIndex = createTempindexDir(core, tmpIdxDirName); tmpIndex = Paths.get(solrCore.getDataDir(), tmpIdxDirName).toString();
tmpIndexDir = core.getDirectoryFactory().get(tmpIndex, DirContext.DEFAULT, core.getSolrConfig().indexConfig.lockType); tmpIndexDir = solrCore.getDirectoryFactory().get(tmpIndex, DirContext.DEFAULT, solrCore.getSolrConfig().indexConfig.lockType);
// cindex dir... // cindex dir...
indexDirPath = core.getIndexDir(); indexDirPath = solrCore.getIndexDir();
indexDir = core.getDirectoryFactory().get(indexDirPath, DirContext.DEFAULT, core.getSolrConfig().indexConfig.lockType); indexDir = solrCore.getDirectoryFactory().get(indexDirPath, DirContext.DEFAULT, solrCore.getSolrConfig().indexConfig.lockType);
try { try {
@ -404,7 +406,7 @@ public class IndexFetcher {
} finally { } finally {
writer.decref(); writer.decref();
} }
solrCore.getUpdateHandler().getSolrCoreState().closeIndexWriter(core, true); solrCore.getUpdateHandler().getSolrCoreState().closeIndexWriter(solrCore, true);
} }
boolean reloadCore = false; boolean reloadCore = false;
@ -422,7 +424,7 @@ public class IndexFetcher {
reloadCore = true; reloadCore = true;
downloadConfFiles(confFilesToDownload, latestGeneration); downloadConfFiles(confFilesToDownload, latestGeneration);
if (isFullCopyNeeded) { if (isFullCopyNeeded) {
successfulInstall = modifyIndexProps(tmpIdxDirName); successfulInstall = IndexFetcher.modifyIndexProps(solrCore, tmpIdxDirName);
deleteTmpIdxDir = false; deleteTmpIdxDir = false;
} else { } else {
successfulInstall = moveIndexFiles(tmpIndexDir, indexDir); successfulInstall = moveIndexFiles(tmpIndexDir, indexDir);
@ -433,8 +435,8 @@ public class IndexFetcher {
// may be closed // may be closed
if (indexDir != null) { if (indexDir != null) {
LOG.info("removing old index directory " + indexDir); LOG.info("removing old index directory " + indexDir);
core.getDirectoryFactory().doneWithDirectory(indexDir); solrCore.getDirectoryFactory().doneWithDirectory(indexDir);
core.getDirectoryFactory().remove(indexDir); solrCore.getDirectoryFactory().remove(indexDir);
} }
} }
@ -446,7 +448,7 @@ public class IndexFetcher {
} else { } else {
terminateAndWaitFsyncService(); terminateAndWaitFsyncService();
if (isFullCopyNeeded) { if (isFullCopyNeeded) {
successfulInstall = modifyIndexProps(tmpIdxDirName); successfulInstall = IndexFetcher.modifyIndexProps(solrCore, tmpIdxDirName);
deleteTmpIdxDir = false; deleteTmpIdxDir = false;
} else { } else {
successfulInstall = moveIndexFiles(tmpIndexDir, indexDir); successfulInstall = moveIndexFiles(tmpIndexDir, indexDir);
@ -458,13 +460,13 @@ public class IndexFetcher {
} }
} finally { } finally {
if (!isFullCopyNeeded) { if (!isFullCopyNeeded) {
solrCore.getUpdateHandler().getSolrCoreState().openIndexWriter(core); solrCore.getUpdateHandler().getSolrCoreState().openIndexWriter(solrCore);
} }
} }
// we must reload the core after we open the IW back up // we must reload the core after we open the IW back up
if (successfulInstall && (reloadCore || forceCoreReload)) { if (successfulInstall && (reloadCore || forceCoreReload)) {
LOG.info("Reloading SolrCore {}", core.getName()); LOG.info("Reloading SolrCore {}", solrCore.getName());
reloadCore(); reloadCore();
} }
@ -474,8 +476,8 @@ public class IndexFetcher {
// may be closed // may be closed
if (indexDir != null) { if (indexDir != null) {
LOG.info("removing old index directory " + indexDir); LOG.info("removing old index directory " + indexDir);
core.getDirectoryFactory().doneWithDirectory(indexDir); solrCore.getDirectoryFactory().doneWithDirectory(indexDir);
core.getDirectoryFactory().remove(indexDir); solrCore.getDirectoryFactory().remove(indexDir);
} }
} }
if (isFullCopyNeeded) { if (isFullCopyNeeded) {
@ -486,13 +488,13 @@ public class IndexFetcher {
} }
if (!isFullCopyNeeded && !forceReplication && !successfulInstall) { if (!isFullCopyNeeded && !forceReplication && !successfulInstall) {
cleanup(core, tmpIndexDir, indexDir, deleteTmpIdxDir, successfulInstall); cleanup(solrCore, tmpIndexDir, indexDir, deleteTmpIdxDir, successfulInstall);
cleanupDone = true; cleanupDone = true;
// we try with a full copy of the index // we try with a full copy of the index
LOG.warn( LOG.warn(
"Replication attempt was not successful - trying a full index replication reloadCore={}", "Replication attempt was not successful - trying a full index replication reloadCore={}",
reloadCore); reloadCore);
successfulInstall = fetchLatestIndex(core, true, reloadCore); successfulInstall = fetchLatestIndex(true, reloadCore);
} }
replicationStartTime = 0; replicationStartTime = 0;
@ -505,15 +507,15 @@ public class IndexFetcher {
} catch (InterruptedException e) { } catch (InterruptedException e) {
throw new InterruptedException("Index fetch interrupted"); throw new InterruptedException("Index fetch interrupted");
} catch (Exception e) { } catch (Exception e) {
throw new SolrException(SolrException.ErrorCode.SERVER_ERROR, "Index fetch failed : ", e); throw new SolrException(ErrorCode.SERVER_ERROR, "Index fetch failed : ", e);
} }
} finally { } finally {
if (!cleanupDone) { if (!cleanupDone) {
cleanup(core, tmpIndexDir, indexDir, deleteTmpIdxDir, successfulInstall); cleanup(solrCore, tmpIndexDir, indexDir, deleteTmpIdxDir, successfulInstall);
} }
} }
} }
private void cleanup(final SolrCore core, Directory tmpIndexDir, private void cleanup(final SolrCore core, Directory tmpIndexDir,
Directory indexDir, boolean deleteTmpIdxDir, boolean successfulInstall) throws IOException { Directory indexDir, boolean deleteTmpIdxDir, boolean successfulInstall) throws IOException {
try { try {
@ -524,9 +526,9 @@ public class IndexFetcher {
LOG.error("caught", e); LOG.error("caught", e);
} }
} }
core.getUpdateHandler().getSolrCoreState().setLastReplicateIndexSuccess(successfulInstall); core.getUpdateHandler().getSolrCoreState().setLastReplicateIndexSuccess(successfulInstall);
filesToDownload = filesDownloaded = confFilesDownloaded = confFilesToDownload = null; filesToDownload = filesDownloaded = confFilesDownloaded = confFilesToDownload = null;
replicationStartTime = 0; replicationStartTime = 0;
dirFileFetcher = null; dirFileFetcher = null;
@ -545,11 +547,11 @@ public class IndexFetcher {
SolrException.log(LOG, "Error removing directory " + tmpIndexDir, e); SolrException.log(LOG, "Error removing directory " + tmpIndexDir, e);
} }
} }
if (tmpIndexDir != null) { if (tmpIndexDir != null) {
core.getDirectoryFactory().release(tmpIndexDir); core.getDirectoryFactory().release(tmpIndexDir);
} }
if (indexDir != null) { if (indexDir != null) {
core.getDirectoryFactory().release(indexDir); core.getDirectoryFactory().release(indexDir);
} }
@ -719,15 +721,6 @@ public class IndexFetcher {
} }
/**
* All the files are copied to a temp dir first
*/
private String createTempindexDir(SolrCore core, String tmpIdxDirName) {
// TODO: there should probably be a DirectoryFactory#concatPath(parent, name)
// or something
return core.getDataDir() + tmpIdxDirName;
}
private void reloadCore() { private void reloadCore() {
final CountDownLatch latch = new CountDownLatch(1); final CountDownLatch latch = new CountDownLatch(1);
new Thread() { new Thread() {
@ -815,12 +808,12 @@ public class IndexFetcher {
|| filename.startsWith("segments_") || size < _100K); || filename.startsWith("segments_") || size < _100K);
} }
static class CompareResult { protected static class CompareResult {
boolean equal = false; boolean equal = false;
boolean checkSummed = false; boolean checkSummed = false;
} }
private CompareResult compareFile(Directory indexDir, String filename, Long backupIndexFileLen, Long backupIndexFileChecksum) { protected static CompareResult compareFile(Directory indexDir, String filename, Long backupIndexFileLen, Long backupIndexFileChecksum) {
CompareResult compareResult = new CompareResult(); CompareResult compareResult = new CompareResult();
try { try {
try (final IndexInput indexInput = indexDir.openInput(filename, IOContext.READONCE)) { try (final IndexInput indexInput = indexDir.openInput(filename, IOContext.READONCE)) {
@ -887,8 +880,8 @@ public class IndexFetcher {
} }
/** /**
* All the files which are common between master and slave must have same size else we assume they are * All the files which are common between master and slave must have same size and same checksum else we assume
* not compatible (stale). * they are not compatible (stale).
* *
* @return true if the index stale and we need to download a fresh copy, false otherwise. * @return true if the index stale and we need to download a fresh copy, false otherwise.
* @throws IOException if low level io error * @throws IOException if low level io error
@ -1034,7 +1027,7 @@ public class IndexFetcher {
/** /**
* If the index is stale by any chance, load index from a different dir in the data dir. * If the index is stale by any chance, load index from a different dir in the data dir.
*/ */
private boolean modifyIndexProps(String tmpIdxDirName) { protected static boolean modifyIndexProps(SolrCore solrCore, String tmpIdxDirName) {
LOG.info("New index installed. Updating index properties... index="+tmpIdxDirName); LOG.info("New index installed. Updating index properties... index="+tmpIdxDirName);
Properties p = new Properties(); Properties p = new Properties();
Directory dir = null; Directory dir = null;
@ -1042,7 +1035,7 @@ public class IndexFetcher {
dir = solrCore.getDirectoryFactory().get(solrCore.getDataDir(), DirContext.META_DATA, solrCore.getSolrConfig().indexConfig.lockType); dir = solrCore.getDirectoryFactory().get(solrCore.getDataDir(), DirContext.META_DATA, solrCore.getSolrConfig().indexConfig.lockType);
if (slowFileExists(dir, IndexFetcher.INDEX_PROPERTIES)){ if (slowFileExists(dir, IndexFetcher.INDEX_PROPERTIES)){
final IndexInput input = dir.openInput(IndexFetcher.INDEX_PROPERTIES, DirectoryFactory.IOCONTEXT_NO_CACHE); final IndexInput input = dir.openInput(IndexFetcher.INDEX_PROPERTIES, DirectoryFactory.IOCONTEXT_NO_CACHE);
final InputStream is = new PropertiesInputStream(input); final InputStream is = new PropertiesInputStream(input);
try { try {
p.load(new InputStreamReader(is, StandardCharsets.UTF_8)); p.load(new InputStreamReader(is, StandardCharsets.UTF_8));
@ -1083,7 +1076,7 @@ public class IndexFetcher {
} }
} }
} }
} }
private final Map<String, FileInfo> confFileInfoCache = new HashMap<>(); private final Map<String, FileInfo> confFileInfoCache = new HashMap<>();

View File

@ -0,0 +1,50 @@
package org.apache.solr.handler;
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
import java.io.File;
import java.text.SimpleDateFormat;
import java.util.Date;
import java.util.Locale;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
class OldBackupDirectory implements Comparable<OldBackupDirectory> {
File dir;
Date timestamp;
private final Pattern dirNamePattern = Pattern.compile("^snapshot[.](.*)$");
OldBackupDirectory(File dir) {
if(dir.isDirectory()) {
Matcher m = dirNamePattern.matcher(dir.getName());
if(m.find()) {
try {
this.dir = dir;
this.timestamp = new SimpleDateFormat(SnapShooter.DATE_FMT, Locale.ROOT).parse(m.group(1));
} catch(Exception e) {
this.dir = null;
this.timestamp = null;
}
}
}
}
@Override
public int compareTo(OldBackupDirectory that) {
return that.timestamp.compareTo(this.timestamp);
}
}

View File

@ -36,7 +36,9 @@ import java.util.HashMap;
import java.util.List; import java.util.List;
import java.util.Map; import java.util.Map;
import java.util.Properties; import java.util.Properties;
import java.util.concurrent.ExecutorService;
import java.util.concurrent.Executors; import java.util.concurrent.Executors;
import java.util.concurrent.Future;
import java.util.concurrent.ScheduledExecutorService; import java.util.concurrent.ScheduledExecutorService;
import java.util.concurrent.TimeUnit; import java.util.concurrent.TimeUnit;
import java.util.concurrent.atomic.AtomicBoolean; import java.util.concurrent.atomic.AtomicBoolean;
@ -146,6 +148,13 @@ public class ReplicationHandler extends RequestHandlerBase implements SolrCoreAw
private ReentrantLock indexFetchLock = new ReentrantLock(); private ReentrantLock indexFetchLock = new ReentrantLock();
private ExecutorService restoreExecutor = Executors.newSingleThreadExecutor(
new DefaultSolrThreadFactory("restoreExecutor"));
private volatile Future<Boolean> restoreFuture;
private volatile String currentRestoreName;
private String includeConfFiles; private String includeConfFiles;
private NamedList<String> confFileNameAlias = new NamedList<>(); private NamedList<String> confFileNameAlias = new NamedList<>();
@ -205,13 +214,13 @@ public class ReplicationHandler extends RequestHandlerBase implements SolrCoreAw
// It gives the current 'replicateable' index version // It gives the current 'replicateable' index version
if (command.equals(CMD_INDEX_VERSION)) { if (command.equals(CMD_INDEX_VERSION)) {
IndexCommit commitPoint = indexCommitPoint; // make a copy so it won't change IndexCommit commitPoint = indexCommitPoint; // make a copy so it won't change
if (commitPoint == null) { if (commitPoint == null) {
// if this handler is 'lazy', we may not have tracked the last commit // if this handler is 'lazy', we may not have tracked the last commit
// because our commit listener is registered on inform // because our commit listener is registered on inform
commitPoint = core.getDeletionPolicy().getLatestCommit(); commitPoint = core.getDeletionPolicy().getLatestCommit();
} }
if (commitPoint != null && replicationEnabled.get()) { if (commitPoint != null && replicationEnabled.get()) {
// //
// There is a race condition here. The commit point may be changed / deleted by the time // There is a race condition here. The commit point may be changed / deleted by the time
@ -235,6 +244,11 @@ public class ReplicationHandler extends RequestHandlerBase implements SolrCoreAw
} else if (command.equalsIgnoreCase(CMD_BACKUP)) { } else if (command.equalsIgnoreCase(CMD_BACKUP)) {
doSnapShoot(new ModifiableSolrParams(solrParams), rsp, req); doSnapShoot(new ModifiableSolrParams(solrParams), rsp, req);
rsp.add(STATUS, OK_STATUS); rsp.add(STATUS, OK_STATUS);
} else if (command.equalsIgnoreCase(CMD_RESTORE)) {
restore(new ModifiableSolrParams(solrParams), rsp, req);
rsp.add(STATUS, OK_STATUS);
} else if (command.equalsIgnoreCase(CMD_RESTORE_STATUS)) {
rsp.add(CMD_RESTORE_STATUS, getRestoreStatus());
} else if (command.equalsIgnoreCase(CMD_DELETE_BACKUP)) { } else if (command.equalsIgnoreCase(CMD_DELETE_BACKUP)) {
deleteSnapshot(new ModifiableSolrParams(solrParams)); deleteSnapshot(new ModifiableSolrParams(solrParams));
rsp.add(STATUS, OK_STATUS); rsp.add(STATUS, OK_STATUS);
@ -302,7 +316,7 @@ public class ReplicationHandler extends RequestHandlerBase implements SolrCoreAw
throw new SolrException(ErrorCode.BAD_REQUEST, "Missing mandatory param: name"); throw new SolrException(ErrorCode.BAD_REQUEST, "Missing mandatory param: name");
} }
SnapShooter snapShooter = new SnapShooter(core, params.get("location"), params.get(NAME)); SnapShooter snapShooter = new SnapShooter(core, params.get(LOCATION), params.get(NAME));
snapShooter.validateDeleteSnapshot(); snapShooter.validateDeleteSnapshot();
snapShooter.deleteSnapAsync(this); snapShooter.deleteSnapAsync(this);
} }
@ -361,7 +375,7 @@ public class ReplicationHandler extends RequestHandlerBase implements SolrCoreAw
} else { } else {
currentIndexFetcher = pollingIndexFetcher; currentIndexFetcher = pollingIndexFetcher;
} }
return currentIndexFetcher.fetchLatestIndex(core, forceReplication); return currentIndexFetcher.fetchLatestIndex(forceReplication);
} catch (Exception e) { } catch (Exception e) {
SolrException.log(LOG, "Index fetch failed ", e); SolrException.log(LOG, "Index fetch failed ", e);
} finally { } finally {
@ -377,6 +391,72 @@ public class ReplicationHandler extends RequestHandlerBase implements SolrCoreAw
return indexFetchLock.isLocked(); return indexFetchLock.isLocked();
} }
private void restore(SolrParams params, SolrQueryResponse rsp, SolrQueryRequest req) {
if (restoreFuture != null && !restoreFuture.isDone()) {
throw new SolrException(ErrorCode.BAD_REQUEST, "Restore in progress. Cannot run multiple restore operations" +
"for the same core");
}
String name = params.get(NAME);
String location = params.get(LOCATION);
//If location is not provided then assume that the restore index is present inside the data directory.
if (location == null) {
location = core.getDataDir();
}
//If name is not provided then look for the last unnamed( the ones with the snapshot.timestamp format)
//snapshot folder since we allow snapshots to be taken without providing a name. Pick the latest timestamp.
if (name == null) {
File[] files = new File(location).listFiles();
List<OldBackupDirectory> dirs = new ArrayList<>();
for (File f : files) {
OldBackupDirectory obd = new OldBackupDirectory(f);
if (obd.dir != null) {
dirs.add(obd);
}
}
Collections.sort(dirs);
if (dirs.size() == 0) {
throw new SolrException(ErrorCode.BAD_REQUEST, "No backup name specified and none found in " + core.getDataDir());
}
name = dirs.get(0).dir.getName();
} else {
//"snapshot." is prefixed by snapshooter
name = "snapshot." + name;
}
RestoreCore restoreCore = new RestoreCore(core, location, name);
restoreFuture = restoreExecutor.submit(restoreCore);
currentRestoreName = name;
}
private NamedList<Object> getRestoreStatus() {
NamedList<Object> status = new SimpleOrderedMap<>();
if (restoreFuture == null) {
status.add(STATUS, "No restore actions in progress");
return status;
}
status.add("snapshotName", currentRestoreName);
if (restoreFuture.isDone()) {
try {
boolean success = restoreFuture.get();
if (success) {
status.add(STATUS, SUCCESS);
} else {
status.add(STATUS, FAILED);
}
} catch (Exception e) {
status.add(STATUS, FAILED);
status.add(EXCEPTION, e.getMessage());
}
} else {
status.add(STATUS, "In Progress");
}
return status;
}
private void doSnapShoot(SolrParams params, SolrQueryResponse rsp, private void doSnapShoot(SolrParams params, SolrQueryResponse rsp,
SolrQueryRequest req) { SolrQueryRequest req) {
try { try {
@ -391,19 +471,19 @@ public class ReplicationHandler extends RequestHandlerBase implements SolrCoreAw
if (numberToKeep < 1) { if (numberToKeep < 1) {
numberToKeep = Integer.MAX_VALUE; numberToKeep = Integer.MAX_VALUE;
} }
IndexDeletionPolicyWrapper delPolicy = core.getDeletionPolicy(); IndexDeletionPolicyWrapper delPolicy = core.getDeletionPolicy();
IndexCommit indexCommit = delPolicy.getLatestCommit(); IndexCommit indexCommit = delPolicy.getLatestCommit();
if (indexCommit == null) { if (indexCommit == null) {
indexCommit = req.getSearcher().getIndexReader().getIndexCommit(); indexCommit = req.getSearcher().getIndexReader().getIndexCommit();
} }
// small race here before the commit point is saved // small race here before the commit point is saved
SnapShooter snapShooter = new SnapShooter(core, params.get("location"), params.get(NAME)); SnapShooter snapShooter = new SnapShooter(core, params.get("location"), params.get(NAME));
snapShooter.validateCreateSnapshot(); snapShooter.validateCreateSnapshot();
snapShooter.createSnapAsync(indexCommit, numberToKeep, this); snapShooter.createSnapAsync(indexCommit, numberToKeep, this);
} catch (Exception e) { } catch (Exception e) {
LOG.warn("Exception during creating a snapshot", e); LOG.warn("Exception during creating a snapshot", e);
rsp.add("exception", e); rsp.add("exception", e);
@ -420,7 +500,7 @@ public class ReplicationHandler extends RequestHandlerBase implements SolrCoreAw
private void getFileStream(SolrParams solrParams, SolrQueryResponse rsp) { private void getFileStream(SolrParams solrParams, SolrQueryResponse rsp) {
ModifiableSolrParams rawParams = new ModifiableSolrParams(solrParams); ModifiableSolrParams rawParams = new ModifiableSolrParams(solrParams);
rawParams.set(CommonParams.WT, FILE_STREAM); rawParams.set(CommonParams.WT, FILE_STREAM);
String cfileName = solrParams.get(CONF_FILE_SHORT); String cfileName = solrParams.get(CONF_FILE_SHORT);
if (cfileName != null) { if (cfileName != null) {
rsp.add(FILE_STREAM, new LocalFsFileStream(solrParams)); rsp.add(FILE_STREAM, new LocalFsFileStream(solrParams));
@ -438,7 +518,7 @@ public class ReplicationHandler extends RequestHandlerBase implements SolrCoreAw
} }
long gen = Long.parseLong(v); long gen = Long.parseLong(v);
IndexCommit commit = core.getDeletionPolicy().getCommitPoint(gen); IndexCommit commit = core.getDeletionPolicy().getCommitPoint(gen);
//System.out.println("ask for files for gen:" + commit.getGeneration() + core.getCoreDescriptor().getCoreContainer().getZkController().getNodeName()); //System.out.println("ask for files for gen:" + commit.getGeneration() + core.getCoreDescriptor().getCoreContainer().getZkController().getNodeName());
if (commit == null) { if (commit == null) {
rsp.add("status", "invalid index generation"); rsp.add("status", "invalid index generation");
@ -456,7 +536,7 @@ public class ReplicationHandler extends RequestHandlerBase implements SolrCoreAw
Map<String,Object> fileMeta = new HashMap<>(); Map<String,Object> fileMeta = new HashMap<>();
fileMeta.put(NAME, file); fileMeta.put(NAME, file);
fileMeta.put(SIZE, dir.fileLength(file)); fileMeta.put(SIZE, dir.fileLength(file));
try (final IndexInput in = dir.openInput(file, IOContext.READONCE)) { try (final IndexInput in = dir.openInput(file, IOContext.READONCE)) {
try { try {
long checksum = CodecUtil.retrieveChecksum(in); long checksum = CodecUtil.retrieveChecksum(in);
@ -465,13 +545,13 @@ public class ReplicationHandler extends RequestHandlerBase implements SolrCoreAw
LOG.warn("Could not read checksum from index file: " + file, e); LOG.warn("Could not read checksum from index file: " + file, e);
} }
} }
result.add(fileMeta); result.add(fileMeta);
} }
} }
// add the segments_N file // add the segments_N file
Map<String,Object> fileMeta = new HashMap<>(); Map<String,Object> fileMeta = new HashMap<>();
fileMeta.put(NAME, infos.getSegmentsFileName()); fileMeta.put(NAME, infos.getSegmentsFileName());
fileMeta.put(SIZE, dir.fileLength(infos.getSegmentsFileName())); fileMeta.put(SIZE, dir.fileLength(infos.getSegmentsFileName()));
@ -487,7 +567,7 @@ public class ReplicationHandler extends RequestHandlerBase implements SolrCoreAw
result.add(fileMeta); result.add(fileMeta);
} catch (IOException e) { } catch (IOException e) {
rsp.add("status", "unable to get file names for given index generation"); rsp.add("status", "unable to get file names for given index generation");
rsp.add("exception", e); rsp.add(EXCEPTION, e);
LOG.error("Unable to get file names for indexCommit generation: " + gen, e); LOG.error("Unable to get file names for indexCommit generation: " + gen, e);
} finally { } finally {
if (dir != null) { if (dir != null) {
@ -613,7 +693,7 @@ public class ReplicationHandler extends RequestHandlerBase implements SolrCoreAw
return "ReplicationHandler provides replication of index and configuration files from Master to Slaves"; return "ReplicationHandler provides replication of index and configuration files from Master to Slaves";
} }
/** /**
* returns the CommitVersionInfo for the current searcher, or null on error. * returns the CommitVersionInfo for the current searcher, or null on error.
*/ */
private CommitVersionInfo getIndexVersion() { private CommitVersionInfo getIndexVersion() {
@ -694,7 +774,7 @@ public class ReplicationHandler extends RequestHandlerBase implements SolrCoreAw
CommitVersionInfo vInfo = getIndexVersion(); CommitVersionInfo vInfo = getIndexVersion();
details.add("indexVersion", null == vInfo ? 0 : vInfo.version); details.add("indexVersion", null == vInfo ? 0 : vInfo.version);
details.add(GENERATION, null == vInfo ? 0 : vInfo.generation); details.add(GENERATION, null == vInfo ? 0 : vInfo.generation);
IndexCommit commit = indexCommitPoint; // make a copy so it won't change IndexCommit commit = indexCommitPoint; // make a copy so it won't change
if (isMaster) { if (isMaster) {
@ -832,11 +912,11 @@ public class ReplicationHandler extends RequestHandlerBase implements SolrCoreAw
details.add("master", master); details.add("master", master);
if (slave.size() > 0) if (slave.size() > 0)
details.add("slave", slave); details.add("slave", slave);
NamedList snapshotStats = snapShootDetails; NamedList snapshotStats = snapShootDetails;
if (snapshotStats != null) if (snapshotStats != null)
details.add(CMD_BACKUP, snapshotStats); details.add(CMD_BACKUP, snapshotStats);
return details; return details;
} }
@ -971,12 +1051,12 @@ public class ReplicationHandler extends RequestHandlerBase implements SolrCoreAw
Boolean.toString(enableMaster) + " and slave setting is " + Boolean.toString(enableSlave)); Boolean.toString(enableMaster) + " and slave setting is " + Boolean.toString(enableSlave));
} }
} }
if (!enableSlave && !enableMaster) { if (!enableSlave && !enableMaster) {
enableMaster = true; enableMaster = true;
master = new NamedList<>(); master = new NamedList<>();
} }
if (enableMaster) { if (enableMaster) {
includeConfFiles = (String) master.get(CONF_FILES); includeConfFiles = (String) master.get(CONF_FILES);
if (includeConfFiles != null && includeConfFiles.trim().length() > 0) { if (includeConfFiles != null && includeConfFiles.trim().length() > 0) {
@ -999,7 +1079,7 @@ public class ReplicationHandler extends RequestHandlerBase implements SolrCoreAw
if (!replicateOnCommit && ! replicateOnOptimize) { if (!replicateOnCommit && ! replicateOnOptimize) {
replicateOnCommit = true; replicateOnCommit = true;
} }
// if we only want to replicate on optimize, we need the deletion policy to // if we only want to replicate on optimize, we need the deletion policy to
// save the last optimized commit point. // save the last optimized commit point.
if (replicateOnOptimize) { if (replicateOnOptimize) {
@ -1068,7 +1148,7 @@ public class ReplicationHandler extends RequestHandlerBase implements SolrCoreAw
isMaster = true; isMaster = true;
} }
} }
// check master or slave is enabled // check master or slave is enabled
private boolean isEnabled( NamedList params ){ private boolean isEnabled( NamedList params ){
if( params == null ) return false; if( params == null ) return false;
@ -1106,6 +1186,19 @@ public class ReplicationHandler extends RequestHandlerBase implements SolrCoreAw
@Override @Override
public void postClose(SolrCore core) {} public void postClose(SolrCore core) {}
}); });
core.addCloseHook(new CloseHook() {
@Override
public void preClose(SolrCore core) {
ExecutorUtil.shutdownNowAndAwaitTermination(restoreExecutor);
if (restoreFuture != null) {
restoreFuture.cancel(true);
}
}
@Override
public void postClose(SolrCore core) {}
});
} }
/** /**
@ -1407,6 +1500,14 @@ public class ReplicationHandler extends RequestHandlerBase implements SolrCoreAw
return result; return result;
} }
private static final String LOCATION = "location";
private static final String SUCCESS = "success";
private static final String FAILED = "failed";
private static final String EXCEPTION = "exception";
public static final String MASTER_URL = "masterUrl"; public static final String MASTER_URL = "masterUrl";
public static final String STATUS = "status"; public static final String STATUS = "status";
@ -1417,6 +1518,10 @@ public class ReplicationHandler extends RequestHandlerBase implements SolrCoreAw
public static final String CMD_BACKUP = "backup"; public static final String CMD_BACKUP = "backup";
public static final String CMD_RESTORE = "restore";
public static final String CMD_RESTORE_STATUS = "restorestatus";
public static final String CMD_FETCH_INDEX = "fetchindex"; public static final String CMD_FETCH_INDEX = "fetchindex";
public static final String CMD_ABORT_FETCH = "abortfetch"; public static final String CMD_ABORT_FETCH = "abortfetch";

View File

@ -0,0 +1,149 @@
package org.apache.solr.handler;
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
import java.nio.file.Path;
import java.nio.file.Paths;
import java.util.concurrent.Callable;
import java.util.concurrent.Future;
import org.apache.lucene.codecs.CodecUtil;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.FSDirectory;
import org.apache.lucene.store.IOContext;
import org.apache.lucene.store.IndexInput;
import org.apache.solr.common.SolrException;
import org.apache.solr.core.DirectoryFactory;
import org.apache.solr.core.SolrCore;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
public class RestoreCore implements Callable<Boolean> {
private static final Logger log = LoggerFactory.getLogger(RestoreCore.class.getName());
private final String backupName;
private final String backupLocation;
private final SolrCore core;
public RestoreCore(SolrCore core, String location, String name) {
this.core = core;
this.backupLocation = location;
this.backupName = name;
}
@Override
public Boolean call() throws Exception {
return doRestore();
}
private boolean doRestore() throws Exception {
Path backupPath = Paths.get(backupLocation, backupName);
String restoreIndexName = "restore." + backupName;
Path restoreIndexPath = Paths.get(core.getDataDir(), restoreIndexName);
Directory restoreIndexDir = null;
Directory indexDir = null;
try (Directory backupDir = FSDirectory.open(backupPath)) {
restoreIndexDir = core.getDirectoryFactory().get(restoreIndexPath.toString(),
DirectoryFactory.DirContext.DEFAULT, core.getSolrConfig().indexConfig.lockType);
//Prefer local copy.
indexDir = core.getDirectoryFactory().get(core.getIndexDir(),
DirectoryFactory.DirContext.DEFAULT, core.getSolrConfig().indexConfig.lockType);
//Move all files from backupDir to restoreIndexDir
for (String filename : backupDir.listAll()) {
checkInterrupted();
log.info("Copying over file to restore directory " + filename);
try (IndexInput indexInput = backupDir.openInput(filename, IOContext.READONCE)) {
long checksum = CodecUtil.retrieveChecksum(indexInput);
long length = indexInput.length();
IndexFetcher.CompareResult compareResult = IndexFetcher.compareFile(indexDir, filename, length, checksum);
if (!compareResult.equal || (!compareResult.checkSummed && (filename.endsWith(".si")
|| filename.endsWith(".liv") || filename.startsWith("segments_")))) {
restoreIndexDir.copyFrom(backupDir, filename, filename, IOContext.READONCE);
} else {
//prefer local copy
restoreIndexDir.copyFrom(indexDir, filename, filename, IOContext.READONCE);
}
} catch (Exception e) {
throw new SolrException(SolrException.ErrorCode.UNKNOWN, "Exception while restoring the backup index", e);
}
}
log.debug("Switching directories");
IndexFetcher.modifyIndexProps(core, restoreIndexName);
boolean success;
try {
core.getUpdateHandler().newIndexWriter(false);
openNewSearcher();
success = true;
log.info("Successfully restored to the backup index");
} catch (Exception e) {
//Rollback to the old index directory. Delete the restore index directory and mark the restore as failed.
log.info("Could not switch to restored index. Rolling back to the current index");
Directory dir = null;
try {
dir = core.getDirectoryFactory().get(core.getDataDir(), DirectoryFactory.DirContext.META_DATA,
core.getSolrConfig().indexConfig.lockType);
dir.deleteFile(IndexFetcher.INDEX_PROPERTIES);
} finally {
if (dir != null) {
core.getDirectoryFactory().release(dir);
}
}
core.getDirectoryFactory().doneWithDirectory(restoreIndexDir);
core.getDirectoryFactory().remove(restoreIndexDir);
core.getUpdateHandler().newIndexWriter(false);
openNewSearcher();
throw new SolrException(SolrException.ErrorCode.UNKNOWN, "Exception while restoring the backup index", e);
}
if (success) {
core.getDirectoryFactory().doneWithDirectory(indexDir);
core.getDirectoryFactory().remove(indexDir);
}
return true;
} finally {
if (restoreIndexDir != null) {
core.getDirectoryFactory().release(restoreIndexDir);
}
if (indexDir != null) {
core.getDirectoryFactory().release(indexDir);
}
}
}
private void checkInterrupted() throws InterruptedException {
if (Thread.currentThread().isInterrupted()) {
throw new InterruptedException("Stopping restore process. Thread was interrupted.");
}
}
private void openNewSearcher() throws Exception {
Future[] waitSearcher = new Future[1];
core.getSearcher(true, false, waitSearcher, true);
if (waitSearcher[0] != null) {
waitSearcher[0].get();
}
}
}

View File

@ -18,6 +18,7 @@ package org.apache.solr.handler;
import java.io.File; import java.io.File;
import java.io.IOException; import java.io.IOException;
import java.nio.file.Paths;
import java.text.SimpleDateFormat; import java.text.SimpleDateFormat;
import java.util.ArrayList; import java.util.ArrayList;
import java.util.Collection; import java.util.Collection;
@ -58,10 +59,11 @@ public class SnapShooter {
public SnapShooter(SolrCore core, String location, String snapshotName) { public SnapShooter(SolrCore core, String location, String snapshotName) {
solrCore = core; solrCore = core;
if (location == null) snapDir = core.getDataDir(); if (location == null) {
snapDir = core.getDataDir();
}
else { else {
File base = new File(core.getCoreDescriptor().getInstanceDir()); snapDir = Paths.get(core.getCoreDescriptor().getInstanceDir()).resolve(location).toAbsolutePath().toString();
snapDir = org.apache.solr.util.FileUtils.resolvePath(base, location).getAbsolutePath();
} }
this.snapshotName = snapshotName; this.snapshotName = snapshotName;
@ -125,7 +127,7 @@ public class SnapShooter {
} }
void createSnapshot(final IndexCommit indexCommit, ReplicationHandler replicationHandler) { void createSnapshot(final IndexCommit indexCommit, ReplicationHandler replicationHandler) {
LOG.info("Creating backup snapshot..."); LOG.info("Creating backup snapshot " + (snapshotName == null ? "<not named>" : snapshotName));
NamedList<Object> details = new NamedList<>(); NamedList<Object> details = new NamedList<>();
details.add("startTime", new Date().toString()); details.add("startTime", new Date().toString());
try { try {
@ -193,31 +195,6 @@ public class SnapShooter {
replicationHandler.snapShootDetails = details; replicationHandler.snapShootDetails = details;
} }
private class OldBackupDirectory implements Comparable<OldBackupDirectory>{
File dir;
Date timestamp;
final Pattern dirNamePattern = Pattern.compile("^snapshot[.](.*)$");
OldBackupDirectory(File dir) {
if(dir.isDirectory()) {
Matcher m = dirNamePattern.matcher(dir.getName());
if(m.find()) {
try {
this.dir = dir;
this.timestamp = new SimpleDateFormat(DATE_FMT, Locale.ROOT).parse(m.group(1));
} catch(Exception e) {
this.dir = null;
this.timestamp = null;
}
}
}
}
@Override
public int compareTo(OldBackupDirectory that) {
return that.timestamp.compareTo(this.timestamp);
}
}
public static final String DATE_FMT = "yyyyMMddHHmmssSSS"; public static final String DATE_FMT = "yyyyMMddHHmmssSSS";

View File

@ -121,7 +121,7 @@ public class TestReplicationHandlerBackup extends SolrJettyTestBase {
@Test @Test
public void testBackupOnCommit() throws Exception { public void testBackupOnCommit() throws Exception {
//Index //Index
int nDocs = indexDocs(); int nDocs = indexDocs(masterClient);
//Confirm if completed //Confirm if completed
CheckBackupStatus checkBackupStatus = new CheckBackupStatus((HttpSolrClient) masterClient); CheckBackupStatus checkBackupStatus = new CheckBackupStatus((HttpSolrClient) masterClient);
@ -146,7 +146,7 @@ public class TestReplicationHandlerBackup extends SolrJettyTestBase {
} }
} }
private int indexDocs() throws IOException, SolrServerException { protected static int indexDocs(SolrClient masterClient) throws IOException, SolrServerException {
int nDocs = TestUtil.nextInt(random(), 1, 100); int nDocs = TestUtil.nextInt(random(), 1, 100);
masterClient.deleteByQuery("*:*"); masterClient.deleteByQuery("*:*");
for (int i = 0; i < nDocs; i++) { for (int i = 0; i < nDocs; i++) {
@ -164,7 +164,7 @@ public class TestReplicationHandlerBackup extends SolrJettyTestBase {
@Test @Test
public void doTestBackup() throws Exception { public void doTestBackup() throws Exception {
int nDocs = indexDocs(); int nDocs = indexDocs(masterClient);
Path[] snapDir = new Path[5]; //One extra for the backup on commit Path[] snapDir = new Path[5]; //One extra for the backup on commit
//First snapshot location //First snapshot location
@ -180,18 +180,17 @@ public class TestReplicationHandlerBackup extends SolrJettyTestBase {
backupNames = new String[4]; backupNames = new String[4];
} }
for (int i = 0; i < 4; i++) { for (int i = 0; i < 4; i++) {
BackupCommand backupCommand;
final String backupName = TestUtil.randomSimpleString(random(), 1, 20); final String backupName = TestUtil.randomSimpleString(random(), 1, 20);
if (!namedBackup) { if (!namedBackup) {
backupCommand = new BackupCommand(addNumberToKeepInRequest, backupKeepParamName, ReplicationHandler.CMD_BACKUP); if (addNumberToKeepInRequest) {
runBackupCommand(masterJetty, ReplicationHandler.CMD_BACKUP, "&" + backupKeepParamName + "=2");
} else {
runBackupCommand(masterJetty, ReplicationHandler.CMD_BACKUP, "");
}
} else { } else {
backupCommand = new BackupCommand(backupName, ReplicationHandler.CMD_BACKUP); runBackupCommand(masterJetty, ReplicationHandler.CMD_BACKUP, "&name=" + backupName);
backupNames[i] = backupName; backupNames[i] = backupName;
} }
backupCommand.runCommand();
if (backupCommand.fail != null) {
fail(backupCommand.fail);
}
CheckBackupStatus checkBackupStatus = new CheckBackupStatus((HttpSolrClient) masterClient, firstBackupTimestamp); CheckBackupStatus checkBackupStatus = new CheckBackupStatus((HttpSolrClient) masterClient, firstBackupTimestamp);
while (!checkBackupStatus.success) { while (!checkBackupStatus.success) {
@ -253,8 +252,7 @@ public class TestReplicationHandlerBackup extends SolrJettyTestBase {
private void testDeleteNamedBackup(String backupNames[]) throws InterruptedException, IOException { private void testDeleteNamedBackup(String backupNames[]) throws InterruptedException, IOException {
String lastTimestamp = null; String lastTimestamp = null;
for (int i = 0; i < 2; i++) { for (int i = 0; i < 2; i++) {
BackupCommand deleteBackupCommand = new BackupCommand(backupNames[i], ReplicationHandler.CMD_DELETE_BACKUP); runBackupCommand(masterJetty, ReplicationHandler.CMD_DELETE_BACKUP, "&name=" +backupNames[i]);
deleteBackupCommand.runCommand();
CheckDeleteBackupStatus checkDeleteBackupStatus = new CheckDeleteBackupStatus(backupNames[i], lastTimestamp); CheckDeleteBackupStatus checkDeleteBackupStatus = new CheckDeleteBackupStatus(backupNames[i], lastTimestamp);
while (true) { while (true) {
boolean success = checkDeleteBackupStatus.fetchStatus(); boolean success = checkDeleteBackupStatus.fetchStatus();
@ -267,52 +265,19 @@ public class TestReplicationHandlerBackup extends SolrJettyTestBase {
} }
Thread.sleep(200); Thread.sleep(200);
} }
if (deleteBackupCommand.fail != null) {
fail(deleteBackupCommand.fail);
}
} }
} }
private class BackupCommand { public static void runBackupCommand(JettySolrRunner masterJetty, String cmd, String params) throws IOException {
String fail = null; String masterUrl = buildUrl(masterJetty.getLocalPort(), context) + "/" + DEFAULT_TEST_CORENAME
final boolean addNumberToKeepInRequest; + "/replication?command=" + cmd + params;
String backupKeepParamName; InputStream stream = null;
String backupName; try {
String cmd; URL url = new URL(masterUrl);
stream = url.openStream();
BackupCommand(boolean addNumberToKeepInRequest, String backupKeepParamName, String command) { stream.close();
this.addNumberToKeepInRequest = addNumberToKeepInRequest; } finally {
this.backupKeepParamName = backupKeepParamName; IOUtils.closeQuietly(stream);
this.cmd = command;
}
BackupCommand(String backupName, String command) {
this.backupName = backupName;
addNumberToKeepInRequest = false;
this.cmd = command;
}
public void runCommand() {
String masterUrl;
if(backupName != null) {
masterUrl = buildUrl(masterJetty.getLocalPort(), context) + "/" + DEFAULT_TEST_CORENAME + "/replication?command=" + cmd +
"&name=" + backupName;
} else {
masterUrl = buildUrl(masterJetty.getLocalPort(), context) + "/" + DEFAULT_TEST_CORENAME + "/replication?command=" + cmd +
(addNumberToKeepInRequest ? "&" + backupKeepParamName + "=2" : "");
}
InputStream stream = null;
try {
URL url = new URL(masterUrl);
stream = url.openStream();
stream.close();
} catch (Exception e) {
fail = e.getMessage();
} finally {
IOUtils.closeQuietly(stream);
}
} }
} }
@ -349,6 +314,6 @@ public class TestReplicationHandlerBackup extends SolrJettyTestBase {
IOUtils.closeQuietly(stream); IOUtils.closeQuietly(stream);
} }
return false; return false;
}; }
} }
} }

View File

@ -0,0 +1,243 @@
package org.apache.solr.handler;
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
import java.io.File;
import java.io.IOException;
import java.io.InputStream;
import java.net.URL;
import java.net.URLEncoder;
import java.nio.file.Files;
import java.nio.file.Path;
import java.nio.file.Paths;
import java.util.regex.Pattern;
import org.apache.commons.io.IOUtils;
import org.apache.lucene.index.IndexFileNames;
import org.apache.lucene.util.TestUtil;
import org.apache.solr.SolrJettyTestBase;
import org.apache.solr.SolrTestCaseJ4;
import org.apache.solr.client.solrj.SolrClient;
import org.apache.solr.client.solrj.SolrServerException;
import org.apache.solr.client.solrj.embedded.JettySolrRunner;
import org.apache.solr.client.solrj.impl.HttpSolrClient;
import org.apache.solr.client.solrj.response.QueryResponse;
import org.apache.solr.common.SolrInputDocument;
import org.apache.solr.common.params.ModifiableSolrParams;
import org.apache.solr.util.FileUtils;
import org.junit.After;
import org.junit.Before;
import org.junit.Test;
@SolrTestCaseJ4.SuppressSSL // Currently unknown why SSL does not work with this test
public class TestRestoreCore extends SolrJettyTestBase {
JettySolrRunner masterJetty;
TestReplicationHandler.SolrInstance master = null;
SolrClient masterClient;
private static final String CONF_DIR = "solr" + File.separator + "collection1" + File.separator + "conf"
+ File.separator;
private static String context = "/solr";
private static JettySolrRunner createJetty(TestReplicationHandler.SolrInstance instance) throws Exception {
FileUtils.copyFile(new File(SolrTestCaseJ4.TEST_HOME(), "solr.xml"), new File(instance.getHomeDir(), "solr.xml"));
JettySolrRunner jetty = new JettySolrRunner(instance.getHomeDir(), "/solr", 0);
jetty.setDataDir(instance.getDataDir());
jetty.start();
return jetty;
}
private static SolrClient createNewSolrClient(int port) {
try {
// setup the client...
HttpSolrClient client = new HttpSolrClient(buildUrl(port, context) + "/" + DEFAULT_TEST_CORENAME);
client.setConnectionTimeout(15000);
client.setSoTimeout(60000);
client.setDefaultMaxConnectionsPerHost(100);
client.setMaxTotalConnections(100);
return client;
}
catch (Exception ex) {
throw new RuntimeException(ex);
}
}
@Before
public void setUp() throws Exception {
super.setUp();
String configFile = "solrconfig-master.xml";
master = new TestReplicationHandler.SolrInstance(createTempDir("solr-instance").toFile(), "master", null);
master.setUp();
master.copyConfigFile(CONF_DIR + configFile, "solrconfig.xml");
masterJetty = createJetty(master);
masterClient = createNewSolrClient(masterJetty.getLocalPort());
}
@Override
@After
public void tearDown() throws Exception {
super.tearDown();
masterClient.close();
masterClient = null;
masterJetty.stop();
masterJetty = null;
master = null;
}
@Test
public void testSimpleRestore() throws Exception {
int nDocs = TestReplicationHandlerBackup.indexDocs(masterClient);
String snapshotName;
String location;
String params = "";
//Use the default backup location or an externally provided location.
if (random().nextBoolean()) {
location = createTempDir().toFile().getAbsolutePath();
params += "&location=" + URLEncoder.encode(location, "UTF-8");
}
//named snapshot vs default snapshot name
if (random().nextBoolean()) {
snapshotName = TestUtil.randomSimpleString(random(), 1, 5);
params += "&name=" + snapshotName;
}
TestReplicationHandlerBackup.runBackupCommand(masterJetty, ReplicationHandler.CMD_BACKUP, params);
CheckBackupStatus checkBackupStatus = new CheckBackupStatus((HttpSolrClient) masterClient, null);
while (!checkBackupStatus.success) {
checkBackupStatus.fetchStatus();
Thread.sleep(1000);
}
//Modify existing index before we call restore.
//Delete a few docs
int numDeletes = TestUtil.nextInt(random(), 1, nDocs);
for(int i=0; i<numDeletes; i++) {
masterClient.deleteByQuery("id:" + i);
}
masterClient.commit();
//Add a few more
int moreAdds = TestUtil.nextInt(random(), 1, 100);
for (int i=0; i<moreAdds; i++) {
SolrInputDocument doc = new SolrInputDocument();
doc.addField("id", i + nDocs);
doc.addField("name", "name = " + (i + nDocs));
masterClient.add(doc);
}
//Purposely not calling commit once in a while. There can be some docs which are not committed
if (usually()) {
masterClient.commit();
}
TestReplicationHandlerBackup.runBackupCommand(masterJetty, ReplicationHandler.CMD_RESTORE, params);
while (!fetchRestoreStatus()) {
Thread.sleep(1000);
}
//See if restore was successful by checking if all the docs are present again
verifyDocs(nDocs);
}
@Test
public void testFailedRestore() throws Exception {
int nDocs = TestReplicationHandlerBackup.indexDocs(masterClient);
String location = createTempDir().toFile().getAbsolutePath();
String snapshotName = TestUtil.randomSimpleString(random(), 1, 5);
String params = "&name=" + snapshotName + "&location=" + URLEncoder.encode(location, "UTF-8");
TestReplicationHandlerBackup.runBackupCommand(masterJetty, ReplicationHandler.CMD_BACKUP, params);
CheckBackupStatus checkBackupStatus = new CheckBackupStatus((HttpSolrClient) masterClient, null);
while (!checkBackupStatus.success) {
checkBackupStatus.fetchStatus();
Thread.sleep(1000);
}
//Remove the segments_n file so that the backup index is corrupted.
//Restore should fail and it should automatically rollback to the original index.
Path restoreIndexPath = Paths.get(location, "snapshot." + snapshotName);
Path segmentFileName = Files.newDirectoryStream(restoreIndexPath, IndexFileNames.SEGMENTS + "*").iterator().next();
Files.delete(segmentFileName);
TestReplicationHandlerBackup.runBackupCommand(masterJetty, ReplicationHandler.CMD_RESTORE, params);
try {
while (!fetchRestoreStatus()) {
Thread.sleep(1000);
}
fail("Should have thrown an error because restore could not have been successful");
} catch (AssertionError e) {
//supposed to happen
}
verifyDocs(nDocs);
//make sure we can write to the index again
nDocs = TestReplicationHandlerBackup.indexDocs(masterClient);
verifyDocs(nDocs);
}
private void verifyDocs(int nDocs) throws SolrServerException, IOException {
ModifiableSolrParams queryParams = new ModifiableSolrParams();
queryParams.set("q", "*:*");
QueryResponse response = masterClient.query(queryParams);
assertEquals(0, response.getStatus());
assertEquals(nDocs, response.getResults().getNumFound());
}
private boolean fetchRestoreStatus() throws IOException {
String masterUrl = buildUrl(masterJetty.getLocalPort(), context) + "/" + DEFAULT_TEST_CORENAME +
"/replication?command=" + ReplicationHandler.CMD_RESTORE_STATUS;
final Pattern pException = Pattern.compile("<str name=\"exception\">(.*?)</str>");
InputStream stream = null;
try {
URL url = new URL(masterUrl);
stream = url.openStream();
String response = IOUtils.toString(stream, "UTF-8");
if(pException.matcher(response).find()) {
fail("Failed to complete restore action");
}
if(response.contains("<str name=\"status\">success</str>")) {
return true;
} else if (response.contains("<str name=\"status\">failed</str>")){
fail("Restore Failed");
}
stream.close();
} finally {
IOUtils.closeQuietly(stream);
}
return false;
}
}

View File

@ -1 +0,0 @@
0ec13f6423eb6d5858e229939a2bc118473ef94c

View File

@ -0,0 +1 @@
016d0bc512222f1253ee6b64d389c84e22f697f0

View File

@ -1 +0,0 @@
11393498b38e9695d0850cac26fde5613ae268b9

View File

@ -0,0 +1 @@
f5aa318bda4c6c8d688c9d00b90681dcd82ce636

View File

@ -1 +0,0 @@
f7899276dddd01d8a42ecfe27e7031fcf9824422

View File

@ -0,0 +1 @@
2f8757f5ac5e38f46c794e5229d1f3c522e9b1df