merge trunk up to r1671137

git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/branches/lucene6271@1671151 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Robert Muir 2015-04-03 20:13:06 +00:00
commit 363fc49258
43 changed files with 2348 additions and 358 deletions

View File

@ -40,10 +40,15 @@ API Changes
New Features
* LUCENE-6308: Span queries now share document conjunction/intersection
* LUCENE-6308, LUCENE-6385, LUCENE-6391: Span queries now share
document conjunction/intersection
code with boolean queries, and use two-phased iterators for
faster intersection by avoiding loading positions in certain cases.
(Paul Elschot, Robert Muir via Mike McCandless)
(Paul Elschot, Terry Smith, Robert Muir via Mike McCandless)
* LUCENE-6352: Added a new query time join to the join module that uses
global ordinals, which is faster for subsequent joins between reopens.
(Martijn van Groningen, Adrien Grand)
Optimizations
@ -52,6 +57,9 @@ Optimizations
faster IndexWriter.deleteAll in that case (Robert Muir, Adrien
Grand, Mike McCandless)
* LUCENE-6388: Optimize SpanNearQuery when payloads are not present.
(Robert Muir)
Bug Fixes
* LUCENE-6378: Fix all RuntimeExceptions to throw the underlying root cause.
@ -123,6 +131,10 @@ Bug Fixes
DocumentsWriterStallControl to prevent hangs during indexing if we
miss a .notify/All somewhere (Mike McCandless)
* LUCENE-6386: Correct IndexWriter.forceMerge documentation to state
that up to 3X (X = current index size) spare disk space may be needed
to complete forceMerge(1). (Robert Muir, Shai Erera, Mike McCandless)
Optimizations
* LUCENE-6183, LUCENE-5647: Avoid recompressing stored fields

View File

@ -1547,14 +1547,15 @@ public class IndexWriter implements Closeable, TwoPhaseCommit, Accountable {
* longer be changed).</p>
*
* <p>Note that this requires free space that is proportional
* to the size of the index in your Directory (2X if you're
* using compound file format). For example, if your index
* size is 10 MB then you need an additional 10 MB free for
* this to complete (20 MB if you're using compound file
* format). This is also affected by the {@link Codec} that
* is used to execute the merge, and may result in even a
* bigger index. Also, it's best to call {@link #commit()}
* afterwards, to allow IndexWriter to free up disk space.</p>
* to the size of the index in your Directory: 2X if you are
* not using compound file format, and 3X if you are.
* For example, if your index size is 10 MB then you need
* an additional 20 MB free for this to complete (30 MB if
* you're using compound file format). This is also affected
* by the {@link Codec} that is used to execute the merge,
* and may result in even a bigger index. Also, it's best
* to call {@link #commit()} afterwards, to allow IndexWriter
* to free up disk space.</p>
*
* <p>If some but not all readers re-open while merging
* is underway, this will cause {@code > 2X} temporary

View File

@ -232,8 +232,8 @@ public class PayloadNearQuery extends SpanNearQuery {
scratch.bytes = thePayload;
scratch.offset = 0;
scratch.length = thePayload.length;
payloadScore = function.currentScore(doc, fieldName, start, end,
payloadsSeen, payloadScore, docScorer.computePayloadFactor(doc,
payloadScore = function.currentScore(docID(), fieldName, start, end,
payloadsSeen, payloadScore, docScorer.computePayloadFactor(docID(),
spans.startPosition(), spans.endPosition(), scratch));
++payloadsSeen;
}
@ -241,7 +241,7 @@ public class PayloadNearQuery extends SpanNearQuery {
//
@Override
protected boolean setFreqCurrentDoc() throws IOException {
protected void setFreqCurrentDoc() throws IOException {
freq = 0.0f;
payloadScore = 0;
payloadsSeen = 0;
@ -255,14 +255,12 @@ public class PayloadNearQuery extends SpanNearQuery {
getPayloads(spansArr);
startPos = spans.nextStartPosition();
} while (startPos != Spans.NO_MORE_POSITIONS);
return true;
}
@Override
public float score() throws IOException {
return super.score()
* function.docScore(doc, fieldName, payloadsSeen, payloadScore);
public float scoreCurrentDoc() throws IOException {
return super.scoreCurrentDoc()
* function.docScore(docID(), fieldName, payloadsSeen, payloadScore);
}
}

View File

@ -99,7 +99,7 @@ public class PayloadTermQuery extends SpanTermQuery {
}
@Override
protected boolean setFreqCurrentDoc() throws IOException {
protected void setFreqCurrentDoc() throws IOException {
freq = 0.0f;
numMatches = 0;
payloadScore = 0;
@ -115,7 +115,6 @@ public class PayloadTermQuery extends SpanTermQuery {
startPos = spans.nextStartPosition();
} while (startPos != Spans.NO_MORE_POSITIONS);
return freq != 0;
}
protected void processPayload(Similarity similarity) throws IOException {
@ -123,11 +122,11 @@ public class PayloadTermQuery extends SpanTermQuery {
final PostingsEnum postings = termSpans.getPostings();
payload = postings.getPayload();
if (payload != null) {
payloadScore = function.currentScore(doc, term.field(),
payloadScore = function.currentScore(docID(), term.field(),
spans.startPosition(), spans.endPosition(), payloadsSeen, payloadScore,
docScorer.computePayloadFactor(doc, spans.startPosition(), spans.endPosition(), payload));
docScorer.computePayloadFactor(docID(), spans.startPosition(), spans.endPosition(), payload));
} else {
payloadScore = function.currentScore(doc, term.field(),
payloadScore = function.currentScore(docID(), term.field(),
spans.startPosition(), spans.endPosition(), payloadsSeen, payloadScore, 1F);
}
payloadsSeen++;
@ -143,8 +142,7 @@ public class PayloadTermQuery extends SpanTermQuery {
* @throws IOException if there is a low-level I/O error
*/
@Override
public float score() throws IOException {
public float scoreCurrentDoc() throws IOException {
return includeSpanScore ? getSpanScore() * getPayloadScore()
: getPayloadScore();
}
@ -160,7 +158,7 @@ public class PayloadTermQuery extends SpanTermQuery {
* @see #score()
*/
protected float getSpanScore() throws IOException {
return super.score();
return super.scoreCurrentDoc();
}
/**
@ -170,7 +168,7 @@ public class PayloadTermQuery extends SpanTermQuery {
* {@link PayloadFunction#docScore(int, String, int, float)}
*/
protected float getPayloadScore() {
return function.docScore(doc, term.field(), payloadsSeen, payloadScore);
return function.docScore(docID(), term.field(), payloadsSeen, payloadScore);
}
}

View File

@ -29,11 +29,11 @@ import java.util.Objects;
* Common super class for un/ordered Spans
*/
abstract class NearSpans extends Spans {
SpanNearQuery query;
int allowedSlop;
final SpanNearQuery query;
final int allowedSlop;
List<Spans> subSpans; // in query order
DocIdSetIterator conjunction; // use to move to next doc with all clauses
final Spans[] subSpans; // in query order
final DocIdSetIterator conjunction; // use to move to next doc with all clauses
boolean atFirstInCurrentDoc;
boolean oneExhaustedInCurrentDoc; // no more results possbile in current doc
@ -44,7 +44,7 @@ abstract class NearSpans extends Spans {
if (subSpans.size() < 2) {
throw new IllegalArgumentException("Less than 2 subSpans: " + query);
}
this.subSpans = Objects.requireNonNull(subSpans); // in query order
this.subSpans = subSpans.toArray(new Spans[subSpans.size()]); // in query order
this.conjunction = ConjunctionDISI.intersect(subSpans);
}
@ -91,13 +91,8 @@ abstract class NearSpans extends Spans {
return res;
}
private Spans[] subSpansArray = null; // init only when needed.
public Spans[] getSubSpans() {
if (subSpansArray == null) {
subSpansArray = subSpans.toArray(new Spans[subSpans.size()]);
}
return subSpansArray;
return subSpans;
}
}

View File

@ -18,12 +18,8 @@ package org.apache.lucene.search.spans;
*/
import java.io.IOException;
import java.util.ArrayList;
import java.util.HashSet;
import java.util.LinkedList;
import java.util.List;
import java.util.Collection;
import java.util.Set;
/** A Spans that is formed from the ordered subspans of a SpanNearQuery
* where the subspans do not overlap and have a maximum slop between them,
@ -146,11 +142,11 @@ public class NearSpansOrdered extends NearSpans {
* otherwise at least one is exhausted in the current doc.
*/
private boolean stretchToOrder() throws IOException {
Spans prevSpans = subSpans.get(0);
Spans prevSpans = subSpans[0];
assert prevSpans.startPosition() != NO_MORE_POSITIONS : "prevSpans no start position "+prevSpans;
assert prevSpans.endPosition() != NO_MORE_POSITIONS;
for (int i = 1; i < subSpans.size(); i++) {
Spans spans = subSpans.get(i);
for (int i = 1; i < subSpans.length; i++) {
Spans spans = subSpans[i];
assert spans.startPosition() != NO_MORE_POSITIONS;
assert spans.endPosition() != NO_MORE_POSITIONS;
@ -169,15 +165,14 @@ public class NearSpansOrdered extends NearSpans {
* on all subSpans, except the last one, in reverse order.
*/
protected boolean shrinkToAfterShortestMatch() throws IOException {
Spans lastSubSpans = subSpans.get(subSpans.size() - 1);
Spans lastSubSpans = subSpans[subSpans.length - 1];
matchStart = lastSubSpans.startPosition();
matchEnd = lastSubSpans.endPosition();
int matchSlop = 0;
int lastStart = matchStart;
int lastEnd = matchEnd;
for (int i = subSpans.size() - 2; i >= 0; i--) {
Spans prevSpans = subSpans.get(i);
for (int i = subSpans.length - 2; i >= 0; i--) {
Spans prevSpans = subSpans[i];
int prevStart = prevSpans.startPosition();
int prevEnd = prevSpans.endPosition();
@ -206,7 +201,6 @@ public class NearSpansOrdered extends NearSpans {
*/
matchStart = prevStart;
lastStart = prevStart;
lastEnd = prevEnd;
}
boolean match = matchSlop <= allowedSlop;
@ -224,16 +218,14 @@ public class NearSpansOrdered extends NearSpans {
return atFirstInCurrentDoc ? -1 : matchEnd;
}
/** Throws an UnsupportedOperationException */
@Override
public Collection<byte[]> getPayload() throws IOException {
throw new UnsupportedOperationException("Use NearSpansPayloadOrdered instead");
return null;
}
/** Throws an UnsupportedOperationException */
@Override
public boolean isPayloadAvailable() {
throw new UnsupportedOperationException("Use NearSpansPayloadOrdered instead");
return false;
}
@Override

View File

@ -47,7 +47,7 @@ public class NearSpansPayloadOrdered extends NearSpansOrdered {
* Also collect the payloads.
*/
protected boolean shrinkToAfterShortestMatch() throws IOException {
Spans lastSubSpans = subSpans.get(subSpans.size() - 1);
Spans lastSubSpans = subSpans[subSpans.length - 1];
matchStart = lastSubSpans.startPosition();
matchEnd = lastSubSpans.endPosition();
@ -62,9 +62,8 @@ public class NearSpansPayloadOrdered extends NearSpansOrdered {
int matchSlop = 0;
int lastStart = matchStart;
int lastEnd = matchEnd;
for (int i = subSpans.size() - 2; i >= 0; i--) {
Spans prevSpans = subSpans.get(i);
for (int i = subSpans.length - 2; i >= 0; i--) {
Spans prevSpans = subSpans[i];
if (prevSpans.isPayloadAvailable()) {
Collection<byte[]> payload = prevSpans.getPayload();
@ -112,7 +111,6 @@ public class NearSpansPayloadOrdered extends NearSpansOrdered {
*/
matchStart = prevStart;
lastStart = prevStart;
lastEnd = prevEnd;
}
boolean match = matchSlop <= allowedSlop;

View File

@ -18,19 +18,17 @@ package org.apache.lucene.search.spans;
*/
import java.io.IOException;
import java.util.List;
import java.util.ArrayList;
import java.util.Iterator;
import java.util.Map;
import java.util.Set;
import org.apache.lucene.index.LeafReaderContext;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.Term;
import org.apache.lucene.index.TermContext;
import org.apache.lucene.index.Terms;
import org.apache.lucene.search.Query;
import org.apache.lucene.util.Bits;
import org.apache.lucene.util.ToStringUtils;
@ -132,9 +130,14 @@ public class SpanNearQuery extends SpanQuery implements Cloneable {
}
}
Terms terms = context.reader().terms(field);
if (terms == null) {
return null; // field does not exist
}
// all NearSpans require at least two subSpans
return (! inOrder) ? new NearSpansUnordered(this, subSpans)
: collectPayloads ? new NearSpansPayloadOrdered(this, subSpans)
: collectPayloads && terms.hasPayloads() ? new NearSpansPayloadOrdered(this, subSpans)
: new NearSpansOrdered(this, subSpans);
}

View File

@ -146,7 +146,7 @@ public abstract class SpanPositionCheckQuery extends SpanQuery implements Clonea
startPos = in.nextStartPosition();
assert startPos != NO_MORE_POSITIONS;
for (;;) {
switch(acceptPosition(this)) {
switch(acceptPosition(in)) {
case YES:
atFirstInCurrentDoc = true;
return in.docID();
@ -180,7 +180,7 @@ public abstract class SpanPositionCheckQuery extends SpanQuery implements Clonea
if (startPos == NO_MORE_POSITIONS) {
return NO_MORE_POSITIONS;
}
switch(acceptPosition(this)) {
switch(acceptPosition(in)) {
case YES:
return startPos;
case NO:

View File

@ -21,48 +21,58 @@ import java.io.IOException;
import java.util.Objects;
import org.apache.lucene.search.Scorer;
import org.apache.lucene.search.TwoPhaseIterator;
import org.apache.lucene.search.similarities.Similarity;
/**
* Public for extension only.
*/
public class SpanScorer extends Scorer {
protected Spans spans;
protected int doc;
protected float freq;
protected int numMatches;
/** underlying spans we are scoring from */
protected final Spans spans;
/** similarity used in default score impl */
protected final Similarity.SimScorer docScorer;
protected SpanScorer(Spans spans, SpanWeight weight, Similarity.SimScorer docScorer)
throws IOException {
/** accumulated sloppy freq (computed in setFreqCurrentDoc) */
protected float freq;
/** number of matches (computed in setFreqCurrentDoc) */
protected int numMatches;
private int lastScoredDoc = -1; // last doc we called setFreqCurrentDoc() for
protected SpanScorer(Spans spans, SpanWeight weight, Similarity.SimScorer docScorer) throws IOException {
super(weight);
this.docScorer = Objects.requireNonNull(docScorer);
this.spans = Objects.requireNonNull(spans);
this.doc = -1;
}
@Override
public int nextDoc() throws IOException {
int prevDoc = doc;
doc = spans.nextDoc();
if (doc != NO_MORE_DOCS) {
setFreqCurrentDoc();
}
return doc;
public final int nextDoc() throws IOException {
return spans.nextDoc();
}
@Override
public int advance(int target) throws IOException {
int prevDoc = doc;
doc = spans.advance(target);
if (doc != NO_MORE_DOCS) {
setFreqCurrentDoc();
}
return doc;
public final int advance(int target) throws IOException {
return spans.advance(target);
}
protected boolean setFreqCurrentDoc() throws IOException {
/**
* Ensure setFreqCurrentDoc is called, if not already called for the current doc.
*/
private final void ensureFreq() throws IOException {
int currentDoc = spans.docID();
if (lastScoredDoc != currentDoc) {
setFreqCurrentDoc();
lastScoredDoc = currentDoc;
}
}
/**
* Sets {@link #freq} and {@link #numMatches} for the current document.
* <p>
* This will be called at most once per document.
*/
protected void setFreqCurrentDoc() throws IOException {
freq = 0.0f;
numMatches = 0;
@ -90,34 +100,46 @@ public class SpanScorer extends Scorer {
assert spans.startPosition() == Spans.NO_MORE_POSITIONS : "incorrect final start position, spans="+spans;
assert spans.endPosition() == Spans.NO_MORE_POSITIONS : "incorrect final end position, spans="+spans;
}
return true;
/**
* Score the current doc. The default implementation scores the doc
* with the similarity using the slop-adjusted {@link #freq}.
*/
protected float scoreCurrentDoc() throws IOException {
return docScorer.score(spans.docID(), freq);
}
@Override
public int docID() { return doc; }
public final int docID() { return spans.docID(); }
@Override
public float score() throws IOException {
float s = docScorer.score(doc, freq);
return s;
public final float score() throws IOException {
ensureFreq();
return scoreCurrentDoc();
}
@Override
public int freq() throws IOException {
public final int freq() throws IOException {
ensureFreq();
return numMatches;
}
/** Returns the intermediate "sloppy freq" adjusted for edit distance
* @lucene.internal */
// only public so .payloads can see it.
public float sloppyFreq() throws IOException {
public final float sloppyFreq() throws IOException {
ensureFreq();
return freq;
}
@Override
public long cost() {
public final long cost() {
return spans.cost();
}
@Override
public final TwoPhaseIterator asTwoPhaseIterator() {
return spans.asTwoPhaseIterator();
}
}

View File

@ -199,8 +199,8 @@ public class TestIndexWriterForceMerge extends LuceneTestCase {
assertTrue("forceMerge used too much temporary space: starting usage was "
+ startDiskUsage + " bytes; final usage was " + finalDiskUsage
+ " bytes; max temp usage was " + maxDiskUsage
+ " but should have been " + (3 * maxStartFinalDiskUsage)
+ " (= 3X starting usage), BEFORE=" + startListing + "AFTER=" + listFiles(dir), maxDiskUsage <= 3 * maxStartFinalDiskUsage);
+ " but should have been at most " + (4 * maxStartFinalDiskUsage)
+ " (= 4X starting usage), BEFORE=" + startListing + "AFTER=" + listFiles(dir), maxDiskUsage <= 4 * maxStartFinalDiskUsage);
dir.close();
}

View File

@ -162,7 +162,12 @@ final class JustCompileSearchSpans {
}
@Override
protected boolean setFreqCurrentDoc() {
protected void setFreqCurrentDoc() {
throw new UnsupportedOperationException(UNSUPPORTED_MSG);
}
@Override
protected float scoreCurrentDoc() throws IOException {
throw new UnsupportedOperationException(UNSUPPORTED_MSG);
}
}

View File

@ -21,6 +21,7 @@ import org.apache.lucene.index.Term;
import org.apache.lucene.search.BooleanClause.Occur;
import org.apache.lucene.search.BooleanQuery;
import org.apache.lucene.search.PhraseQuery;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.SearchEquivalenceTestBase;
import org.apache.lucene.search.TermQuery;
@ -106,4 +107,122 @@ public class TestSpanSearchEquivalence extends SearchEquivalenceTestBase {
SpanNearQuery q2 = new SpanNearQuery(subquery, 3, false);
assertSubsetOf(q1, q2);
}
/** SpanNearQuery([A B], N, false) ⊆ SpanNearQuery([A B], N+1, false) */
public void testSpanNearIncreasingSloppiness() throws Exception {
Term t1 = randomTerm();
Term t2 = randomTerm();
SpanQuery subquery[] = new SpanQuery[] { new SpanTermQuery(t1), new SpanTermQuery(t2) };
for (int i = 0; i < 10; i++) {
SpanNearQuery q1 = new SpanNearQuery(subquery, i, false);
SpanNearQuery q2 = new SpanNearQuery(subquery, i+1, false);
assertSubsetOf(q1, q2);
}
}
/** SpanNearQuery([A B C], N, false) ⊆ SpanNearQuery([A B C], N+1, false) */
public void testSpanNearIncreasingSloppiness3() throws Exception {
Term t1 = randomTerm();
Term t2 = randomTerm();
Term t3 = randomTerm();
SpanQuery subquery[] = new SpanQuery[] { new SpanTermQuery(t1), new SpanTermQuery(t2), new SpanTermQuery(t3) };
for (int i = 0; i < 10; i++) {
SpanNearQuery q1 = new SpanNearQuery(subquery, i, false);
SpanNearQuery q2 = new SpanNearQuery(subquery, i+1, false);
assertSubsetOf(q1, q2);
}
}
/** SpanNearQuery([A B], N, true) ⊆ SpanNearQuery([A B], N+1, true) */
public void testSpanNearIncreasingOrderedSloppiness() throws Exception {
Term t1 = randomTerm();
Term t2 = randomTerm();
SpanQuery subquery[] = new SpanQuery[] { new SpanTermQuery(t1), new SpanTermQuery(t2) };
for (int i = 0; i < 10; i++) {
SpanNearQuery q1 = new SpanNearQuery(subquery, i, false);
SpanNearQuery q2 = new SpanNearQuery(subquery, i+1, false);
assertSubsetOf(q1, q2);
}
}
/** SpanNearQuery([A B C], N, true) ⊆ SpanNearQuery([A B C], N+1, true) */
public void testSpanNearIncreasingOrderedSloppiness3() throws Exception {
Term t1 = randomTerm();
Term t2 = randomTerm();
Term t3 = randomTerm();
SpanQuery subquery[] = new SpanQuery[] { new SpanTermQuery(t1), new SpanTermQuery(t2), new SpanTermQuery(t3) };
for (int i = 0; i < 10; i++) {
SpanNearQuery q1 = new SpanNearQuery(subquery, i, true);
SpanNearQuery q2 = new SpanNearQuery(subquery, i+1, true);
assertSubsetOf(q1, q2);
}
}
/** SpanFirstQuery(A, N) ⊆ TermQuery(A) */
public void testSpanFirstTerm() throws Exception {
Term t1 = randomTerm();
for (int i = 0; i < 10; i++) {
Query q1 = new SpanFirstQuery(new SpanTermQuery(t1), i);
Query q2 = new TermQuery(t1);
assertSubsetOf(q1, q2);
}
}
/** SpanFirstQuery(A, N) ⊆ SpanFirstQuery(A, N+1) */
public void testSpanFirstTermIncreasing() throws Exception {
Term t1 = randomTerm();
for (int i = 0; i < 10; i++) {
Query q1 = new SpanFirstQuery(new SpanTermQuery(t1), i);
Query q2 = new SpanFirstQuery(new SpanTermQuery(t1), i+1);
assertSubsetOf(q1, q2);
}
}
/** SpanFirstQuery(A, ∞) = TermQuery(A) */
public void testSpanFirstTermEverything() throws Exception {
Term t1 = randomTerm();
Query q1 = new SpanFirstQuery(new SpanTermQuery(t1), Integer.MAX_VALUE);
Query q2 = new TermQuery(t1);
assertSameSet(q1, q2);
}
/** SpanFirstQuery([A B], N) ⊆ SpanNearQuery([A B]) */
@AwaitsFix(bugUrl = "https://issues.apache.org/jira/browse/LUCENE-6393")
public void testSpanFirstNear() throws Exception {
Term t1 = randomTerm();
Term t2 = randomTerm();
SpanQuery subquery[] = new SpanQuery[] { new SpanTermQuery(t1), new SpanTermQuery(t2) };
SpanQuery nearQuery = new SpanNearQuery(subquery, 10, true);
for (int i = 0; i < 10; i++) {
Query q1 = new SpanFirstQuery(nearQuery, i);
Query q2 = nearQuery;
assertSubsetOf(q1, q2);
}
}
/** SpanFirstQuery([A B], N) ⊆ SpanFirstQuery([A B], N+1) */
@AwaitsFix(bugUrl = "https://issues.apache.org/jira/browse/LUCENE-6393")
public void testSpanFirstNearIncreasing() throws Exception {
Term t1 = randomTerm();
Term t2 = randomTerm();
SpanQuery subquery[] = new SpanQuery[] { new SpanTermQuery(t1), new SpanTermQuery(t2) };
SpanQuery nearQuery = new SpanNearQuery(subquery, 10, true);
for (int i = 0; i < 10; i++) {
Query q1 = new SpanFirstQuery(nearQuery, i);
Query q2 = new SpanFirstQuery(nearQuery, i+1);
assertSubsetOf(q1, q2);
}
}
/** SpanFirstQuery([A B], ∞) = SpanNearQuery([A B]) */
@AwaitsFix(bugUrl = "https://issues.apache.org/jira/browse/LUCENE-6393")
public void testSpanFirstNearEverything() throws Exception {
Term t1 = randomTerm();
Term t2 = randomTerm();
SpanQuery subquery[] = new SpanQuery[] { new SpanTermQuery(t1), new SpanTermQuery(t2) };
SpanQuery nearQuery = new SpanNearQuery(subquery, 10, true);
Query q1 = new SpanFirstQuery(nearQuery, Integer.MAX_VALUE);
Query q2 = nearQuery;
assertSameSet(q1, q2);
}
}

View File

@ -306,6 +306,9 @@ public class WeightedSpanTermExtractor {
}
Bits acceptDocs = context.reader().getLiveDocs();
final Spans spans = q.getSpans(context, acceptDocs, termContexts);
if (spans == null) {
return;
}
// collect span positions
while (spans.nextDoc() != Spans.NO_MORE_DOCS) {

View File

@ -0,0 +1,82 @@
package org.apache.lucene.search.highlight;
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
import java.io.IOException;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.MockAnalyzer;
import org.apache.lucene.analysis.MockTokenizer;
import org.apache.lucene.index.Term;
import org.apache.lucene.search.BooleanClause.Occur;
import org.apache.lucene.search.BooleanQuery;
import org.apache.lucene.search.PhraseQuery;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.TermQuery;
import org.apache.lucene.search.spans.SpanNearQuery;
import org.apache.lucene.search.spans.SpanQuery;
import org.apache.lucene.search.spans.SpanTermQuery;
import org.apache.lucene.util.LuceneTestCase;
public class MissesTest extends LuceneTestCase {
public void testTermQuery() throws IOException, InvalidTokenOffsetsException {
try (Analyzer analyzer = new MockAnalyzer(random(), MockTokenizer.WHITESPACE, false)) {
final Query query = new TermQuery(new Term("test", "foo"));
final Highlighter highlighter = new Highlighter(new SimpleHTMLFormatter(), new QueryScorer(query));
assertEquals("this is a <B>foo</B> bar example",
highlighter.getBestFragment(analyzer, "test", "this is a foo bar example"));
assertNull(highlighter.getBestFragment(analyzer, "test", "this does not match"));
}
}
public void testBooleanQuery() throws IOException, InvalidTokenOffsetsException {
try (Analyzer analyzer = new MockAnalyzer(random(), MockTokenizer.WHITESPACE, false)) {
final BooleanQuery query = new BooleanQuery();
query.add(new TermQuery(new Term("test", "foo")), Occur.MUST);
query.add(new TermQuery(new Term("test", "bar")), Occur.MUST);
final Highlighter highlighter = new Highlighter(new SimpleHTMLFormatter(), new QueryScorer(query));
assertEquals("this is a <B>foo</B> <B>bar</B> example",
highlighter.getBestFragment(analyzer, "test", "this is a foo bar example"));
assertNull(highlighter.getBestFragment(analyzer, "test", "this does not match"));
}
}
public void testPhraseQuery() throws IOException, InvalidTokenOffsetsException {
try (Analyzer analyzer = new MockAnalyzer(random(), MockTokenizer.WHITESPACE, false)) {
final PhraseQuery query = new PhraseQuery();
query.add(new Term("test", "foo"));
query.add(new Term("test", "bar"));
final Highlighter highlighter = new Highlighter(new SimpleHTMLFormatter(), new QueryScorer(query));
assertEquals("this is a <B>foo</B> <B>bar</B> example",
highlighter.getBestFragment(analyzer, "test", "this is a foo bar example"));
assertNull(highlighter.getBestFragment(analyzer, "test", "this does not match"));
}
}
public void testSpanNearQuery() throws IOException, InvalidTokenOffsetsException {
try (Analyzer analyzer = new MockAnalyzer(random(), MockTokenizer.WHITESPACE, false)) {
final Query query = new SpanNearQuery(new SpanQuery[] {
new SpanTermQuery(new Term("test", "foo")),
new SpanTermQuery(new Term("test", "bar"))}, 0, true);
final Highlighter highlighter = new Highlighter(new SimpleHTMLFormatter(), new QueryScorer(query));
assertEquals("this is a <B>foo</B> <B>bar</B> example",
highlighter.getBestFragment(analyzer, "test", "this is a foo bar example"));
assertNull(highlighter.getBestFragment(analyzer, "test", "this does not match"));
}
}
}

View File

@ -116,9 +116,9 @@ org.apache.hadoop.version = 2.6.0
# The httpcore version is often different from the httpclient and httpmime versions,
# so the httpcore version value should not share the same symbolic name with them.
/org.apache.httpcomponents/httpclient = 4.3.1
/org.apache.httpcomponents/httpcore = 4.3
/org.apache.httpcomponents/httpmime = 4.3.1
/org.apache.httpcomponents/httpclient = 4.4.1
/org.apache.httpcomponents/httpcore = 4.4.1
/org.apache.httpcomponents/httpmime = 4.4.1
/org.apache.ivy/ivy = 2.3.0

View File

@ -0,0 +1,97 @@
package org.apache.lucene.search.join;
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
import org.apache.lucene.index.SortedDocValues;
import org.apache.lucene.search.DocIdSetIterator;
import org.apache.lucene.search.Scorer;
import org.apache.lucene.search.TwoPhaseIterator;
import org.apache.lucene.search.Weight;
import org.apache.lucene.util.LongBitSet;
import java.io.IOException;
abstract class BaseGlobalOrdinalScorer extends Scorer {
final LongBitSet foundOrds;
final SortedDocValues values;
final Scorer approximationScorer;
float score;
public BaseGlobalOrdinalScorer(Weight weight, LongBitSet foundOrds, SortedDocValues values, Scorer approximationScorer) {
super(weight);
this.foundOrds = foundOrds;
this.values = values;
this.approximationScorer = approximationScorer;
}
@Override
public float score() throws IOException {
return score;
}
@Override
public int docID() {
return approximationScorer.docID();
}
@Override
public int nextDoc() throws IOException {
return advance(approximationScorer.docID() + 1);
}
@Override
public TwoPhaseIterator asTwoPhaseIterator() {
final DocIdSetIterator approximation = new DocIdSetIterator() {
@Override
public int docID() {
return approximationScorer.docID();
}
@Override
public int nextDoc() throws IOException {
return approximationScorer.nextDoc();
}
@Override
public int advance(int target) throws IOException {
return approximationScorer.advance(target);
}
@Override
public long cost() {
return approximationScorer.cost();
}
};
return createTwoPhaseIterator(approximation);
}
@Override
public long cost() {
return approximationScorer.cost();
}
@Override
public int freq() throws IOException {
return 1;
}
protected abstract TwoPhaseIterator createTwoPhaseIterator(DocIdSetIterator approximation);
}

View File

@ -0,0 +1,114 @@
package org.apache.lucene.search.join;
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
import org.apache.lucene.index.DocValues;
import org.apache.lucene.index.LeafReaderContext;
import org.apache.lucene.index.MultiDocValues;
import org.apache.lucene.index.SortedDocValues;
import org.apache.lucene.search.Collector;
import org.apache.lucene.search.LeafCollector;
import org.apache.lucene.search.Scorer;
import org.apache.lucene.util.LongBitSet;
import org.apache.lucene.util.LongValues;
import java.io.IOException;
/**
* A collector that collects all ordinals from a specified field matching the query.
*
* @lucene.experimental
*/
final class GlobalOrdinalsCollector implements Collector {
final String field;
final LongBitSet collectedOrds;
final MultiDocValues.OrdinalMap ordinalMap;
GlobalOrdinalsCollector(String field, MultiDocValues.OrdinalMap ordinalMap, long valueCount) {
this.field = field;
this.ordinalMap = ordinalMap;
this.collectedOrds = new LongBitSet(valueCount);
}
public LongBitSet getCollectorOrdinals() {
return collectedOrds;
}
@Override
public boolean needsScores() {
return false;
}
@Override
public LeafCollector getLeafCollector(LeafReaderContext context) throws IOException {
SortedDocValues docTermOrds = DocValues.getSorted(context.reader(), field);
if (ordinalMap != null) {
LongValues segmentOrdToGlobalOrdLookup = ordinalMap.getGlobalOrds(context.ord);
return new OrdinalMapCollector(docTermOrds, segmentOrdToGlobalOrdLookup);
} else {
return new SegmentOrdinalCollector(docTermOrds);
}
}
final class OrdinalMapCollector implements LeafCollector {
private final SortedDocValues docTermOrds;
private final LongValues segmentOrdToGlobalOrdLookup;
OrdinalMapCollector(SortedDocValues docTermOrds, LongValues segmentOrdToGlobalOrdLookup) {
this.docTermOrds = docTermOrds;
this.segmentOrdToGlobalOrdLookup = segmentOrdToGlobalOrdLookup;
}
@Override
public void collect(int doc) throws IOException {
final long segmentOrd = docTermOrds.getOrd(doc);
if (segmentOrd != -1) {
final long globalOrd = segmentOrdToGlobalOrdLookup.get(segmentOrd);
collectedOrds.set(globalOrd);
}
}
@Override
public void setScorer(Scorer scorer) throws IOException {
}
}
final class SegmentOrdinalCollector implements LeafCollector {
private final SortedDocValues docTermOrds;
SegmentOrdinalCollector(SortedDocValues docTermOrds) {
this.docTermOrds = docTermOrds;
}
@Override
public void collect(int doc) throws IOException {
final long segmentOrd = docTermOrds.getOrd(doc);
if (segmentOrd != -1) {
collectedOrds.set(segmentOrd);
}
}
@Override
public void setScorer(Scorer scorer) throws IOException {
}
}
}

View File

@ -0,0 +1,245 @@
package org.apache.lucene.search.join;
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
import org.apache.lucene.index.DocValues;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.LeafReaderContext;
import org.apache.lucene.index.MultiDocValues;
import org.apache.lucene.index.SortedDocValues;
import org.apache.lucene.index.Term;
import org.apache.lucene.search.ComplexExplanation;
import org.apache.lucene.search.DocIdSetIterator;
import org.apache.lucene.search.Explanation;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.Scorer;
import org.apache.lucene.search.TwoPhaseIterator;
import org.apache.lucene.search.Weight;
import org.apache.lucene.util.Bits;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.LongBitSet;
import org.apache.lucene.util.LongValues;
import java.io.IOException;
import java.util.Set;
final class GlobalOrdinalsQuery extends Query {
// All the ords of matching docs found with OrdinalsCollector.
private final LongBitSet foundOrds;
private final String joinField;
private final MultiDocValues.OrdinalMap globalOrds;
// Is also an approximation of the docs that will match. Can be all docs that have toField or something more specific.
private final Query toQuery;
// just for hashcode and equals:
private final Query fromQuery;
private final IndexReader indexReader;
GlobalOrdinalsQuery(LongBitSet foundOrds, String joinField, MultiDocValues.OrdinalMap globalOrds, Query toQuery, Query fromQuery, IndexReader indexReader) {
this.foundOrds = foundOrds;
this.joinField = joinField;
this.globalOrds = globalOrds;
this.toQuery = toQuery;
this.fromQuery = fromQuery;
this.indexReader = indexReader;
}
@Override
public Weight createWeight(IndexSearcher searcher, boolean needsScores) throws IOException {
return new W(this, toQuery.createWeight(searcher, false));
}
@Override
public void extractTerms(Set<Term> terms) {
fromQuery.extractTerms(terms);
}
@Override
public boolean equals(Object o) {
if (this == o) return true;
if (o == null || getClass() != o.getClass()) return false;
if (!super.equals(o)) return false;
GlobalOrdinalsQuery that = (GlobalOrdinalsQuery) o;
if (!fromQuery.equals(that.fromQuery)) return false;
if (!joinField.equals(that.joinField)) return false;
if (!toQuery.equals(that.toQuery)) return false;
if (!indexReader.equals(that.indexReader)) return false;
return true;
}
@Override
public int hashCode() {
int result = super.hashCode();
result = 31 * result + joinField.hashCode();
result = 31 * result + toQuery.hashCode();
result = 31 * result + fromQuery.hashCode();
result = 31 * result + indexReader.hashCode();
return result;
}
@Override
public String toString(String field) {
return "GlobalOrdinalsQuery{" +
"joinField=" + joinField +
'}';
}
final class W extends Weight {
private final Weight approximationWeight;
private float queryNorm;
private float queryWeight;
W(Query query, Weight approximationWeight) {
super(query);
this.approximationWeight = approximationWeight;
}
@Override
public Explanation explain(LeafReaderContext context, int doc) throws IOException {
SortedDocValues values = DocValues.getSorted(context.reader(), joinField);
if (values != null) {
int segmentOrd = values.getOrd(doc);
if (segmentOrd != -1) {
BytesRef joinValue = values.lookupOrd(segmentOrd);
return new ComplexExplanation(true, queryNorm, "Score based on join value " + joinValue.utf8ToString());
}
}
return new ComplexExplanation(false, 0.0f, "Not a match");
}
@Override
public float getValueForNormalization() throws IOException {
queryWeight = getBoost();
return queryWeight * queryWeight;
}
@Override
public void normalize(float norm, float topLevelBoost) {
this.queryNorm = norm * topLevelBoost;
queryWeight *= this.queryNorm;
}
@Override
public Scorer scorer(LeafReaderContext context, Bits acceptDocs) throws IOException {
SortedDocValues values = DocValues.getSorted(context.reader(), joinField);
if (values == null) {
return null;
}
Scorer approximationScorer = approximationWeight.scorer(context, acceptDocs);
if (approximationScorer == null) {
return null;
}
if (globalOrds != null) {
return new OrdinalMapScorer(this, queryNorm, foundOrds, values, approximationScorer, globalOrds.getGlobalOrds(context.ord));
} {
return new SegmentOrdinalScorer(this, queryNorm, foundOrds, values, approximationScorer);
}
}
}
final static class OrdinalMapScorer extends BaseGlobalOrdinalScorer {
final LongValues segmentOrdToGlobalOrdLookup;
public OrdinalMapScorer(Weight weight, float score, LongBitSet foundOrds, SortedDocValues values, Scorer approximationScorer, LongValues segmentOrdToGlobalOrdLookup) {
super(weight, foundOrds, values, approximationScorer);
this.score = score;
this.segmentOrdToGlobalOrdLookup = segmentOrdToGlobalOrdLookup;
}
@Override
public int advance(int target) throws IOException {
for (int docID = approximationScorer.advance(target); docID < NO_MORE_DOCS; docID = approximationScorer.nextDoc()) {
final long segmentOrd = values.getOrd(docID);
if (segmentOrd != -1) {
final long globalOrd = segmentOrdToGlobalOrdLookup.get(segmentOrd);
if (foundOrds.get(globalOrd)) {
return docID;
}
}
}
return NO_MORE_DOCS;
}
@Override
protected TwoPhaseIterator createTwoPhaseIterator(DocIdSetIterator approximation) {
return new TwoPhaseIterator(approximation) {
@Override
public boolean matches() throws IOException {
final long segmentOrd = values.getOrd(approximationScorer.docID());
if (segmentOrd != -1) {
final long globalOrd = segmentOrdToGlobalOrdLookup.get(segmentOrd);
if (foundOrds.get(globalOrd)) {
return true;
}
}
return false;
}
};
}
}
final static class SegmentOrdinalScorer extends BaseGlobalOrdinalScorer {
public SegmentOrdinalScorer(Weight weight, float score, LongBitSet foundOrds, SortedDocValues values, Scorer approximationScorer) {
super(weight, foundOrds, values, approximationScorer);
this.score = score;
}
@Override
public int advance(int target) throws IOException {
for (int docID = approximationScorer.advance(target); docID < NO_MORE_DOCS; docID = approximationScorer.nextDoc()) {
final long segmentOrd = values.getOrd(docID);
if (segmentOrd != -1) {
if (foundOrds.get(segmentOrd)) {
return docID;
}
}
}
return NO_MORE_DOCS;
}
@Override
protected TwoPhaseIterator createTwoPhaseIterator(DocIdSetIterator approximation) {
return new TwoPhaseIterator(approximation) {
@Override
public boolean matches() throws IOException {
final long segmentOrd = values.getOrd(approximationScorer.docID());
if (segmentOrd != -1) {
if (foundOrds.get(segmentOrd)) {
return true;
}
}
return false;
}
};
}
}
}

View File

@ -0,0 +1,250 @@
package org.apache.lucene.search.join;
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
import org.apache.lucene.index.DocValues;
import org.apache.lucene.index.LeafReaderContext;
import org.apache.lucene.index.MultiDocValues;
import org.apache.lucene.index.SortedDocValues;
import org.apache.lucene.search.Collector;
import org.apache.lucene.search.LeafCollector;
import org.apache.lucene.search.Scorer;
import org.apache.lucene.util.LongBitSet;
import org.apache.lucene.util.LongValues;
import java.io.IOException;
abstract class GlobalOrdinalsWithScoreCollector implements Collector {
final String field;
final MultiDocValues.OrdinalMap ordinalMap;
final LongBitSet collectedOrds;
protected final Scores scores;
GlobalOrdinalsWithScoreCollector(String field, MultiDocValues.OrdinalMap ordinalMap, long valueCount) {
if (valueCount > Integer.MAX_VALUE) {
// We simply don't support more than
throw new IllegalStateException("Can't collect more than [" + Integer.MAX_VALUE + "] ids");
}
this.field = field;
this.ordinalMap = ordinalMap;
this.collectedOrds = new LongBitSet(valueCount);
this.scores = new Scores(valueCount);
}
public LongBitSet getCollectorOrdinals() {
return collectedOrds;
}
public float score(int globalOrdinal) {
return scores.getScore(globalOrdinal);
}
protected abstract void doScore(int globalOrd, float existingScore, float newScore);
@Override
public LeafCollector getLeafCollector(LeafReaderContext context) throws IOException {
SortedDocValues docTermOrds = DocValues.getSorted(context.reader(), field);
if (ordinalMap != null) {
LongValues segmentOrdToGlobalOrdLookup = ordinalMap.getGlobalOrds(context.ord);
return new OrdinalMapCollector(docTermOrds, segmentOrdToGlobalOrdLookup);
} else {
return new SegmentOrdinalCollector(docTermOrds);
}
}
@Override
public boolean needsScores() {
return true;
}
final class OrdinalMapCollector implements LeafCollector {
private final SortedDocValues docTermOrds;
private final LongValues segmentOrdToGlobalOrdLookup;
private Scorer scorer;
OrdinalMapCollector(SortedDocValues docTermOrds, LongValues segmentOrdToGlobalOrdLookup) {
this.docTermOrds = docTermOrds;
this.segmentOrdToGlobalOrdLookup = segmentOrdToGlobalOrdLookup;
}
@Override
public void collect(int doc) throws IOException {
final long segmentOrd = docTermOrds.getOrd(doc);
if (segmentOrd != -1) {
final int globalOrd = (int) segmentOrdToGlobalOrdLookup.get(segmentOrd);
collectedOrds.set(globalOrd);
float existingScore = scores.getScore(globalOrd);
float newScore = scorer.score();
doScore(globalOrd, existingScore, newScore);
}
}
@Override
public void setScorer(Scorer scorer) throws IOException {
this.scorer = scorer;
}
}
final class SegmentOrdinalCollector implements LeafCollector {
private final SortedDocValues docTermOrds;
private Scorer scorer;
SegmentOrdinalCollector(SortedDocValues docTermOrds) {
this.docTermOrds = docTermOrds;
}
@Override
public void collect(int doc) throws IOException {
final int segmentOrd = docTermOrds.getOrd(doc);
if (segmentOrd != -1) {
collectedOrds.set(segmentOrd);
float existingScore = scores.getScore(segmentOrd);
float newScore = scorer.score();
doScore(segmentOrd, existingScore, newScore);
}
}
@Override
public void setScorer(Scorer scorer) throws IOException {
this.scorer = scorer;
}
}
static final class Max extends GlobalOrdinalsWithScoreCollector {
public Max(String field, MultiDocValues.OrdinalMap ordinalMap, long valueCount) {
super(field, ordinalMap, valueCount);
}
@Override
protected void doScore(int globalOrd, float existingScore, float newScore) {
scores.setScore(globalOrd, Math.max(existingScore, newScore));
}
}
static final class Sum extends GlobalOrdinalsWithScoreCollector {
public Sum(String field, MultiDocValues.OrdinalMap ordinalMap, long valueCount) {
super(field, ordinalMap, valueCount);
}
@Override
protected void doScore(int globalOrd, float existingScore, float newScore) {
scores.setScore(globalOrd, existingScore + newScore);
}
}
static final class Avg extends GlobalOrdinalsWithScoreCollector {
private final Occurrences occurrences;
public Avg(String field, MultiDocValues.OrdinalMap ordinalMap, long valueCount) {
super(field, ordinalMap, valueCount);
this.occurrences = new Occurrences(valueCount);
}
@Override
protected void doScore(int globalOrd, float existingScore, float newScore) {
occurrences.increment(globalOrd);
scores.setScore(globalOrd, existingScore + newScore);
}
@Override
public float score(int globalOrdinal) {
return scores.getScore(globalOrdinal) / occurrences.getOccurence(globalOrdinal);
}
}
// Because the global ordinal is directly used as a key to a score we should be somewhat smart about allocation
// the scores array. Most of the times not all docs match so splitting the scores array up in blocks can prevent creation of huge arrays.
// Also working with smaller arrays is supposed to be more gc friendly
//
// At first a hash map implementation would make sense, but in the case that more than half of docs match this becomes more expensive
// then just using an array.
// Maybe this should become a method parameter?
static final int arraySize = 4096;
static final class Scores {
final float[][] blocks;
private Scores(long valueCount) {
long blockSize = valueCount + arraySize - 1;
blocks = new float[(int) ((blockSize) / arraySize)][];
}
public void setScore(int globalOrdinal, float score) {
int block = globalOrdinal / arraySize;
int offset = globalOrdinal % arraySize;
float[] scores = blocks[block];
if (scores == null) {
blocks[block] = scores = new float[arraySize];
}
scores[offset] = score;
}
public float getScore(int globalOrdinal) {
int block = globalOrdinal / arraySize;
int offset = globalOrdinal % arraySize;
float[] scores = blocks[block];
float score;
if (scores != null) {
score = scores[offset];
} else {
score = 0f;
}
return score;
}
}
static final class Occurrences {
final int[][] blocks;
private Occurrences(long valueCount) {
long blockSize = valueCount + arraySize - 1;
blocks = new int[(int) (blockSize / arraySize)][];
}
public void increment(int globalOrdinal) {
int block = globalOrdinal / arraySize;
int offset = globalOrdinal % arraySize;
int[] occurrences = blocks[block];
if (occurrences == null) {
blocks[block] = occurrences = new int[arraySize];
}
occurrences[offset]++;
}
public int getOccurence(int globalOrdinal) {
int block = globalOrdinal / arraySize;
int offset = globalOrdinal % arraySize;
int[] occurrences = blocks[block];
return occurrences[offset];
}
}
}

View File

@ -0,0 +1,256 @@
package org.apache.lucene.search.join;
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
import org.apache.lucene.index.DocValues;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.LeafReaderContext;
import org.apache.lucene.index.MultiDocValues;
import org.apache.lucene.index.SortedDocValues;
import org.apache.lucene.index.Term;
import org.apache.lucene.search.ComplexExplanation;
import org.apache.lucene.search.DocIdSetIterator;
import org.apache.lucene.search.Explanation;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.Scorer;
import org.apache.lucene.search.TwoPhaseIterator;
import org.apache.lucene.search.Weight;
import org.apache.lucene.util.Bits;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.LongValues;
import java.io.IOException;
import java.util.Set;
final class GlobalOrdinalsWithScoreQuery extends Query {
private final GlobalOrdinalsWithScoreCollector collector;
private final String joinField;
private final MultiDocValues.OrdinalMap globalOrds;
// Is also an approximation of the docs that will match. Can be all docs that have toField or something more specific.
private final Query toQuery;
// just for hashcode and equals:
private final Query fromQuery;
private final IndexReader indexReader;
GlobalOrdinalsWithScoreQuery(GlobalOrdinalsWithScoreCollector collector, String joinField, MultiDocValues.OrdinalMap globalOrds, Query toQuery, Query fromQuery, IndexReader indexReader) {
this.collector = collector;
this.joinField = joinField;
this.globalOrds = globalOrds;
this.toQuery = toQuery;
this.fromQuery = fromQuery;
this.indexReader = indexReader;
}
@Override
public Weight createWeight(IndexSearcher searcher, boolean needsScores) throws IOException {
return new W(this, toQuery.createWeight(searcher, false));
}
@Override
public void extractTerms(Set<Term> terms) {
fromQuery.extractTerms(terms);
}
@Override
public boolean equals(Object o) {
if (this == o) return true;
if (o == null || getClass() != o.getClass()) return false;
if (!super.equals(o)) return false;
GlobalOrdinalsWithScoreQuery that = (GlobalOrdinalsWithScoreQuery) o;
if (!fromQuery.equals(that.fromQuery)) return false;
if (!joinField.equals(that.joinField)) return false;
if (!toQuery.equals(that.toQuery)) return false;
if (!indexReader.equals(that.indexReader)) return false;
return true;
}
@Override
public int hashCode() {
int result = super.hashCode();
result = 31 * result + joinField.hashCode();
result = 31 * result + toQuery.hashCode();
result = 31 * result + fromQuery.hashCode();
result = 31 * result + indexReader.hashCode();
return result;
}
@Override
public String toString(String field) {
return "GlobalOrdinalsQuery{" +
"joinField=" + joinField +
'}';
}
final class W extends Weight {
private final Weight approximationWeight;
private float queryNorm;
private float queryWeight;
W(Query query, Weight approximationWeight) {
super(query);
this.approximationWeight = approximationWeight;
}
@Override
public Explanation explain(LeafReaderContext context, int doc) throws IOException {
SortedDocValues values = DocValues.getSorted(context.reader(), joinField);
if (values != null) {
int segmentOrd = values.getOrd(doc);
if (segmentOrd != -1) {
final float score;
if (globalOrds != null) {
long globalOrd = globalOrds.getGlobalOrds(context.ord).get(segmentOrd);
score = collector.scores.getScore((int) globalOrd);
} else {
score = collector.score(segmentOrd);
}
BytesRef joinValue = values.lookupOrd(segmentOrd);
return new ComplexExplanation(true, score, "Score based on join value " + joinValue.utf8ToString());
}
}
return new ComplexExplanation(false, 0.0f, "Not a match");
}
@Override
public float getValueForNormalization() throws IOException {
queryWeight = getBoost();
return queryWeight * queryWeight;
}
@Override
public void normalize(float norm, float topLevelBoost) {
this.queryNorm = norm * topLevelBoost;
queryWeight *= this.queryNorm;
}
@Override
public Scorer scorer(LeafReaderContext context, Bits acceptDocs) throws IOException {
SortedDocValues values = DocValues.getSorted(context.reader(), joinField);
if (values == null) {
return null;
}
Scorer approximationScorer = approximationWeight.scorer(context, acceptDocs);
if (approximationScorer == null) {
return null;
} else if (globalOrds != null) {
return new OrdinalMapScorer(this, collector, values, approximationScorer, globalOrds.getGlobalOrds(context.ord));
} else {
return new SegmentOrdinalScorer(this, collector, values, approximationScorer);
}
}
}
final static class OrdinalMapScorer extends BaseGlobalOrdinalScorer {
final LongValues segmentOrdToGlobalOrdLookup;
final GlobalOrdinalsWithScoreCollector collector;
public OrdinalMapScorer(Weight weight, GlobalOrdinalsWithScoreCollector collector, SortedDocValues values, Scorer approximationScorer, LongValues segmentOrdToGlobalOrdLookup) {
super(weight, collector.getCollectorOrdinals(), values, approximationScorer);
this.segmentOrdToGlobalOrdLookup = segmentOrdToGlobalOrdLookup;
this.collector = collector;
}
@Override
public int advance(int target) throws IOException {
for (int docID = approximationScorer.advance(target); docID < NO_MORE_DOCS; docID = approximationScorer.nextDoc()) {
final long segmentOrd = values.getOrd(docID);
if (segmentOrd != -1) {
final long globalOrd = segmentOrdToGlobalOrdLookup.get(segmentOrd);
if (foundOrds.get(globalOrd)) {
score = collector.score((int) globalOrd);
return docID;
}
}
}
return NO_MORE_DOCS;
}
@Override
protected TwoPhaseIterator createTwoPhaseIterator(DocIdSetIterator approximation) {
return new TwoPhaseIterator(approximation) {
@Override
public boolean matches() throws IOException {
final long segmentOrd = values.getOrd(approximationScorer.docID());
if (segmentOrd != -1) {
final long globalOrd = segmentOrdToGlobalOrdLookup.get(segmentOrd);
if (foundOrds.get(globalOrd)) {
score = collector.score((int) globalOrd);
return true;
}
}
return false;
}
};
}
}
final static class SegmentOrdinalScorer extends BaseGlobalOrdinalScorer {
final GlobalOrdinalsWithScoreCollector collector;
public SegmentOrdinalScorer(Weight weight, GlobalOrdinalsWithScoreCollector collector, SortedDocValues values, Scorer approximationScorer) {
super(weight, collector.getCollectorOrdinals(), values, approximationScorer);
this.collector = collector;
}
@Override
public int advance(int target) throws IOException {
for (int docID = approximationScorer.advance(target); docID < NO_MORE_DOCS; docID = approximationScorer.nextDoc()) {
final int segmentOrd = values.getOrd(docID);
if (segmentOrd != -1) {
if (foundOrds.get(segmentOrd)) {
score = collector.score(segmentOrd);
return docID;
}
}
}
return NO_MORE_DOCS;
}
@Override
protected TwoPhaseIterator createTwoPhaseIterator(DocIdSetIterator approximation) {
return new TwoPhaseIterator(approximation) {
@Override
public boolean matches() throws IOException {
final int segmentOrd = values.getOrd(approximationScorer.docID());
if (segmentOrd != -1) {
if (foundOrds.get(segmentOrd)) {
score = collector.score(segmentOrd);
return true;
}
}
return false;
}
};
}
}
}

View File

@ -17,7 +17,12 @@ package org.apache.lucene.search.join;
* limitations under the License.
*/
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.LeafReader;
import org.apache.lucene.index.MultiDocValues;
import org.apache.lucene.index.SortedDocValues;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.MatchNoDocsQuery;
import org.apache.lucene.search.Query;
import java.io.IOException;
@ -90,4 +95,78 @@ public final class JoinUtil {
}
}
/**
* A query time join using global ordinals over a dedicated join field.
*
* This join has certain restrictions and requirements:
* 1) A document can only refer to one other document. (but can be referred by one or more documents)
* 2) Documents on each side of the join must be distinguishable. Typically this can be done by adding an extra field
* that identifies the "from" and "to" side and then the fromQuery and toQuery must take the this into account.
* 3) There must be a single sorted doc values join field used by both the "from" and "to" documents. This join field
* should store the join values as UTF-8 strings.
* 4) An ordinal map must be provided that is created on top of the join field.
*
* @param joinField The {@link org.apache.lucene.index.SortedDocValues} field containing the join values
* @param fromQuery The query containing the actual user query. Also the fromQuery can only match "from" documents.
* @param toQuery The query identifying all documents on the "to" side.
* @param searcher The index searcher used to execute the from query
* @param scoreMode Instructs how scores from the fromQuery are mapped to the returned query
* @param ordinalMap The ordinal map constructed over the joinField. In case of a single segment index, no ordinal map
* needs to be provided.
* @return a {@link Query} instance that can be used to join documents based on the join field
* @throws IOException If I/O related errors occur
*/
public static Query createJoinQuery(String joinField,
Query fromQuery,
Query toQuery,
IndexSearcher searcher,
ScoreMode scoreMode,
MultiDocValues.OrdinalMap ordinalMap) throws IOException {
IndexReader indexReader = searcher.getIndexReader();
int numSegments = indexReader.leaves().size();
final long valueCount;
if (numSegments == 0) {
return new MatchNoDocsQuery();
} else if (numSegments == 1) {
// No need to use the ordinal map, because there is just one segment.
ordinalMap = null;
LeafReader leafReader = searcher.getIndexReader().leaves().get(0).reader();
SortedDocValues joinSortedDocValues = leafReader.getSortedDocValues(joinField);
if (joinSortedDocValues != null) {
valueCount = joinSortedDocValues.getValueCount();
} else {
return new MatchNoDocsQuery();
}
} else {
if (ordinalMap == null) {
throw new IllegalArgumentException("OrdinalMap is required, because there is more than 1 segment");
}
valueCount = ordinalMap.getValueCount();
}
Query rewrittenFromQuery = searcher.rewrite(fromQuery);
if (scoreMode == ScoreMode.None) {
GlobalOrdinalsCollector globalOrdinalsCollector = new GlobalOrdinalsCollector(joinField, ordinalMap, valueCount);
searcher.search(fromQuery, globalOrdinalsCollector);
return new GlobalOrdinalsQuery(globalOrdinalsCollector.getCollectorOrdinals(), joinField, ordinalMap, toQuery, rewrittenFromQuery, indexReader);
}
GlobalOrdinalsWithScoreCollector globalOrdinalsWithScoreCollector;
switch (scoreMode) {
case Total:
globalOrdinalsWithScoreCollector = new GlobalOrdinalsWithScoreCollector.Sum(joinField, ordinalMap, valueCount);
break;
case Max:
globalOrdinalsWithScoreCollector = new GlobalOrdinalsWithScoreCollector.Max(joinField, ordinalMap, valueCount);
break;
case Avg:
globalOrdinalsWithScoreCollector = new GlobalOrdinalsWithScoreCollector.Avg(joinField, ordinalMap, valueCount);
break;
default:
throw new IllegalArgumentException(String.format(Locale.ROOT, "Score mode %s isn't supported.", scoreMode));
}
searcher.search(fromQuery, globalOrdinalsWithScoreCollector);
return new GlobalOrdinalsWithScoreQuery(globalOrdinalsWithScoreCollector, joinField, ordinalMap, toQuery, rewrittenFromQuery, indexReader);
}
}

View File

@ -17,19 +17,6 @@ package org.apache.lucene.search.join;
* limitations under the License.
*/
import java.io.IOException;
import java.util.ArrayList;
import java.util.Collections;
import java.util.Comparator;
import java.util.HashMap;
import java.util.HashSet;
import java.util.List;
import java.util.Locale;
import java.util.Map;
import java.util.Set;
import java.util.SortedSet;
import java.util.TreeSet;
import org.apache.lucene.analysis.MockAnalyzer;
import org.apache.lucene.analysis.MockTokenizer;
import org.apache.lucene.document.Document;
@ -38,27 +25,29 @@ import org.apache.lucene.document.SortedDocValuesField;
import org.apache.lucene.document.SortedSetDocValuesField;
import org.apache.lucene.document.TextField;
import org.apache.lucene.index.BinaryDocValues;
import org.apache.lucene.index.DirectoryReader;
import org.apache.lucene.index.DocValues;
import org.apache.lucene.index.PostingsEnum;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.LeafReader;
import org.apache.lucene.index.LeafReaderContext;
import org.apache.lucene.index.MultiDocValues;
import org.apache.lucene.index.MultiFields;
import org.apache.lucene.index.NoMergePolicy;
import org.apache.lucene.index.PostingsEnum;
import org.apache.lucene.index.RandomIndexWriter;
import org.apache.lucene.index.SlowCompositeReaderWrapper;
import org.apache.lucene.index.SortedDocValues;
import org.apache.lucene.index.SortedSetDocValues;
import org.apache.lucene.index.Term;
import org.apache.lucene.index.Terms;
import org.apache.lucene.index.TermsEnum;
import org.apache.lucene.search.BooleanClause;
import org.apache.lucene.search.BooleanQuery;
import org.apache.lucene.search.Collector;
import org.apache.lucene.search.DocIdSetIterator;
import org.apache.lucene.search.Explanation;
import org.apache.lucene.search.FilterLeafCollector;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.LeafCollector;
import org.apache.lucene.search.MatchAllDocsQuery;
import org.apache.lucene.search.MultiCollector;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.ScoreDoc;
import org.apache.lucene.search.Scorer;
@ -74,8 +63,22 @@ import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.FixedBitSet;
import org.apache.lucene.util.LuceneTestCase;
import org.apache.lucene.util.TestUtil;
import org.apache.lucene.util.packed.PackedInts;
import org.junit.Test;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Collections;
import java.util.Comparator;
import java.util.HashMap;
import java.util.HashSet;
import java.util.List;
import java.util.Locale;
import java.util.Map;
import java.util.Set;
import java.util.SortedSet;
import java.util.TreeSet;
public class TestJoinUtil extends LuceneTestCase {
public void testSimple() throws Exception {
@ -169,6 +172,180 @@ public class TestJoinUtil extends LuceneTestCase {
dir.close();
}
public void testSimpleOrdinalsJoin() throws Exception {
final String idField = "id";
final String productIdField = "productId";
// A field indicating to what type a document belongs, which is then used to distinques between documents during joining.
final String typeField = "type";
// A single sorted doc values field that holds the join values for all document types.
// Typically during indexing a schema will automatically create this field with the values
final String joinField = idField + productIdField;
Directory dir = newDirectory();
RandomIndexWriter w = new RandomIndexWriter(
random(),
dir,
newIndexWriterConfig(new MockAnalyzer(random())).setMergePolicy(NoMergePolicy.INSTANCE));
// 0
Document doc = new Document();
doc.add(new TextField(idField, "1", Field.Store.NO));
doc.add(new TextField(typeField, "product", Field.Store.NO));
doc.add(new TextField("description", "random text", Field.Store.NO));
doc.add(new TextField("name", "name1", Field.Store.NO));
doc.add(new SortedDocValuesField(joinField, new BytesRef("1")));
w.addDocument(doc);
// 1
doc = new Document();
doc.add(new TextField(productIdField, "1", Field.Store.NO));
doc.add(new TextField(typeField, "price", Field.Store.NO));
doc.add(new TextField("price", "10.0", Field.Store.NO));
doc.add(new SortedDocValuesField(joinField, new BytesRef("1")));
w.addDocument(doc);
// 2
doc = new Document();
doc.add(new TextField(productIdField, "1", Field.Store.NO));
doc.add(new TextField(typeField, "price", Field.Store.NO));
doc.add(new TextField("price", "20.0", Field.Store.NO));
doc.add(new SortedDocValuesField(joinField, new BytesRef("1")));
w.addDocument(doc);
// 3
doc = new Document();
doc.add(new TextField(idField, "2", Field.Store.NO));
doc.add(new TextField(typeField, "product", Field.Store.NO));
doc.add(new TextField("description", "more random text", Field.Store.NO));
doc.add(new TextField("name", "name2", Field.Store.NO));
doc.add(new SortedDocValuesField(joinField, new BytesRef("2")));
w.addDocument(doc);
w.commit();
// 4
doc = new Document();
doc.add(new TextField(productIdField, "2", Field.Store.NO));
doc.add(new TextField(typeField, "price", Field.Store.NO));
doc.add(new TextField("price", "10.0", Field.Store.NO));
doc.add(new SortedDocValuesField(joinField, new BytesRef("2")));
w.addDocument(doc);
// 5
doc = new Document();
doc.add(new TextField(productIdField, "2", Field.Store.NO));
doc.add(new TextField(typeField, "price", Field.Store.NO));
doc.add(new TextField("price", "20.0", Field.Store.NO));
doc.add(new SortedDocValuesField(joinField, new BytesRef("2")));
w.addDocument(doc);
IndexSearcher indexSearcher = new IndexSearcher(w.getReader());
w.close();
IndexReader r = indexSearcher.getIndexReader();
SortedDocValues[] values = new SortedDocValues[r.leaves().size()];
for (int i = 0; i < values.length; i++) {
LeafReader leafReader = r.leaves().get(i).reader();
values[i] = DocValues.getSorted(leafReader, joinField);
}
MultiDocValues.OrdinalMap ordinalMap = MultiDocValues.OrdinalMap.build(
r.getCoreCacheKey(), values, PackedInts.DEFAULT
);
Query toQuery = new TermQuery(new Term(typeField, "price"));
Query fromQuery = new TermQuery(new Term("name", "name2"));
// Search for product and return prices
Query joinQuery = JoinUtil.createJoinQuery(joinField, fromQuery, toQuery, indexSearcher, ScoreMode.None, ordinalMap);
TopDocs result = indexSearcher.search(joinQuery, 10);
assertEquals(2, result.totalHits);
assertEquals(4, result.scoreDocs[0].doc);
assertEquals(5, result.scoreDocs[1].doc);
fromQuery = new TermQuery(new Term("name", "name1"));
joinQuery = JoinUtil.createJoinQuery(joinField, fromQuery, toQuery, indexSearcher, ScoreMode.None, ordinalMap);
result = indexSearcher.search(joinQuery, 10);
assertEquals(2, result.totalHits);
assertEquals(1, result.scoreDocs[0].doc);
assertEquals(2, result.scoreDocs[1].doc);
// Search for prices and return products
fromQuery = new TermQuery(new Term("price", "20.0"));
toQuery = new TermQuery(new Term(typeField, "product"));
joinQuery = JoinUtil.createJoinQuery(joinField, fromQuery, toQuery, indexSearcher, ScoreMode.None, ordinalMap);
result = indexSearcher.search(joinQuery, 10);
assertEquals(2, result.totalHits);
assertEquals(0, result.scoreDocs[0].doc);
assertEquals(3, result.scoreDocs[1].doc);
indexSearcher.getIndexReader().close();
dir.close();
}
public void testRandomOrdinalsJoin() throws Exception {
Directory dir = newDirectory();
RandomIndexWriter w = new RandomIndexWriter(
random(),
dir,
newIndexWriterConfig(new MockAnalyzer(random(), MockTokenizer.KEYWORD, false)).setMergePolicy(newLogMergePolicy())
);
IndexIterationContext context = createContext(100, w, false, true);
w.forceMerge(1);
w.close();
IndexReader topLevelReader = DirectoryReader.open(dir);
SortedDocValues[] values = new SortedDocValues[topLevelReader.leaves().size()];
for (LeafReaderContext leadContext : topLevelReader.leaves()) {
values[leadContext.ord] = DocValues.getSorted(leadContext.reader(), "join_field");
}
context.ordinalMap = MultiDocValues.OrdinalMap.build(
topLevelReader.getCoreCacheKey(), values, PackedInts.DEFAULT
);
IndexSearcher indexSearcher = newSearcher(topLevelReader);
int r = random().nextInt(context.randomUniqueValues.length);
boolean from = context.randomFrom[r];
String randomValue = context.randomUniqueValues[r];
BitSet expectedResult = createExpectedResult(randomValue, from, indexSearcher.getIndexReader(), context);
final Query actualQuery = new TermQuery(new Term("value", randomValue));
if (VERBOSE) {
System.out.println("actualQuery=" + actualQuery);
}
final ScoreMode scoreMode = ScoreMode.values()[random().nextInt(ScoreMode.values().length)];
if (VERBOSE) {
System.out.println("scoreMode=" + scoreMode);
}
final Query joinQuery;
if (from) {
BooleanQuery fromQuery = new BooleanQuery();
fromQuery.add(new TermQuery(new Term("type", "from")), BooleanClause.Occur.FILTER);
fromQuery.add(actualQuery, BooleanClause.Occur.MUST);
Query toQuery = new TermQuery(new Term("type", "to"));
joinQuery = JoinUtil.createJoinQuery("join_field", fromQuery, toQuery, indexSearcher, scoreMode, context.ordinalMap);
} else {
BooleanQuery fromQuery = new BooleanQuery();
fromQuery.add(new TermQuery(new Term("type", "to")), BooleanClause.Occur.FILTER);
fromQuery.add(actualQuery, BooleanClause.Occur.MUST);
Query toQuery = new TermQuery(new Term("type", "from"));
joinQuery = JoinUtil.createJoinQuery("join_field", fromQuery, toQuery, indexSearcher, scoreMode, context.ordinalMap);
}
if (VERBOSE) {
System.out.println("joinQuery=" + joinQuery);
}
final BitSet actualResult = new FixedBitSet(indexSearcher.getIndexReader().maxDoc());
final TopScoreDocCollector topScoreDocCollector = TopScoreDocCollector.create(10);
indexSearcher.search(joinQuery, MultiCollector.wrap(new BitSetCollector(actualResult), topScoreDocCollector));
assertBitSet(expectedResult, actualResult, indexSearcher);
TopDocs expectedTopDocs = createExpectedTopDocs(randomValue, from, scoreMode, context);
TopDocs actualTopDocs = topScoreDocCollector.topDocs();
assertTopDocs(expectedTopDocs, actualTopDocs, scoreMode, indexSearcher, joinQuery);
topLevelReader.close();
dir.close();
}
// TermsWithScoreCollector.MV.Avg forgets to grow beyond TermsWithScoreCollector.INITIAL_ARRAY_SIZE
public void testOverflowTermsWithScoreCollector() throws Exception {
test300spartans(true, ScoreMode.Avg);
@ -448,7 +625,7 @@ public class TestJoinUtil extends LuceneTestCase {
dir,
newIndexWriterConfig(new MockAnalyzer(random(), MockTokenizer.KEYWORD, false)).setMergePolicy(newLogMergePolicy())
);
IndexIterationContext context = createContext(numberOfDocumentsToIndex, w, multipleValuesPerDocument);
IndexIterationContext context = createContext(numberOfDocumentsToIndex, w, multipleValuesPerDocument, false);
IndexReader topLevelReader = w.getReader();
w.close();
@ -485,28 +662,20 @@ public class TestJoinUtil extends LuceneTestCase {
// Need to know all documents that have matches. TopDocs doesn't give me that and then I'd be also testing TopDocsCollector...
final BitSet actualResult = new FixedBitSet(indexSearcher.getIndexReader().maxDoc());
final TopScoreDocCollector topScoreDocCollector = TopScoreDocCollector.create(10);
indexSearcher.search(joinQuery, new Collector() {
@Override
public LeafCollector getLeafCollector(LeafReaderContext context) throws IOException {
final int docBase = context.docBase;
final LeafCollector in = topScoreDocCollector.getLeafCollector(context);
return new FilterLeafCollector(in) {
@Override
public void collect(int doc) throws IOException {
super.collect(doc);
actualResult.set(doc + docBase);
}
};
}
@Override
public boolean needsScores() {
return topScoreDocCollector.needsScores();
}
});
indexSearcher.search(joinQuery, MultiCollector.wrap(new BitSetCollector(actualResult), topScoreDocCollector));
// Asserting bit set...
assertBitSet(expectedResult, actualResult, indexSearcher);
// Asserting TopDocs...
TopDocs expectedTopDocs = createExpectedTopDocs(randomValue, from, scoreMode, context);
TopDocs actualTopDocs = topScoreDocCollector.topDocs();
assertTopDocs(expectedTopDocs, actualTopDocs, scoreMode, indexSearcher, joinQuery);
}
topLevelReader.close();
dir.close();
}
}
private void assertBitSet(BitSet expectedResult, BitSet actualResult, IndexSearcher indexSearcher) throws IOException {
if (VERBOSE) {
System.out.println("expected cardinality:" + expectedResult.cardinality());
DocIdSetIterator iterator = new BitSetIterator(expectedResult, expectedResult.cardinality());
@ -520,14 +689,13 @@ public class TestJoinUtil extends LuceneTestCase {
}
}
assertEquals(expectedResult, actualResult);
}
// Asserting TopDocs...
TopDocs expectedTopDocs = createExpectedTopDocs(randomValue, from, scoreMode, context);
TopDocs actualTopDocs = topScoreDocCollector.topDocs();
private void assertTopDocs(TopDocs expectedTopDocs, TopDocs actualTopDocs, ScoreMode scoreMode, IndexSearcher indexSearcher, Query joinQuery) throws IOException {
assertEquals(expectedTopDocs.totalHits, actualTopDocs.totalHits);
assertEquals(expectedTopDocs.scoreDocs.length, actualTopDocs.scoreDocs.length);
if (scoreMode == ScoreMode.None) {
continue;
return;
}
assertEquals(expectedTopDocs.getMaxScore(), actualTopDocs.getMaxScore(), 0.0f);
@ -542,16 +710,16 @@ public class TestJoinUtil extends LuceneTestCase {
assertEquals(expectedTopDocs.scoreDocs[i].score, explanation.getValue(), 0.0f);
}
}
topLevelReader.close();
dir.close();
}
private IndexIterationContext createContext(int nDocs, RandomIndexWriter writer, boolean multipleValuesPerDocument, boolean ordinalJoin) throws IOException {
return createContext(nDocs, writer, writer, multipleValuesPerDocument, ordinalJoin);
}
private IndexIterationContext createContext(int nDocs, RandomIndexWriter writer, boolean multipleValuesPerDocument) throws IOException {
return createContext(nDocs, writer, writer, multipleValuesPerDocument);
private IndexIterationContext createContext(int nDocs, RandomIndexWriter fromWriter, RandomIndexWriter toWriter, boolean multipleValuesPerDocument, boolean globalOrdinalJoin) throws IOException {
if (globalOrdinalJoin) {
assertFalse("ordinal join doesn't support multiple join values per document", multipleValuesPerDocument);
}
private IndexIterationContext createContext(int nDocs, RandomIndexWriter fromWriter, RandomIndexWriter toWriter, boolean multipleValuesPerDocument) throws IOException {
IndexIterationContext context = new IndexIterationContext();
int numRandomValues = nDocs / 2;
context.randomUniqueValues = new String[numRandomValues];
@ -560,8 +728,8 @@ public class TestJoinUtil extends LuceneTestCase {
for (int i = 0; i < numRandomValues; i++) {
String uniqueRandomValue;
do {
uniqueRandomValue = TestUtil.randomRealisticUnicodeString(random());
// uniqueRandomValue = _TestUtil.randomSimpleString(random);
// uniqueRandomValue = TestUtil.randomRealisticUnicodeString(random());
uniqueRandomValue = TestUtil.randomSimpleString(random());
} while ("".equals(uniqueRandomValue) || trackSet.contains(uniqueRandomValue));
// Generate unique values and empty strings aren't allowed.
trackSet.add(uniqueRandomValue);
@ -581,15 +749,18 @@ public class TestJoinUtil extends LuceneTestCase {
boolean from = context.randomFrom[randomI];
int numberOfLinkValues = multipleValuesPerDocument ? 2 + random().nextInt(10) : 1;
docs[i] = new RandomDoc(id, numberOfLinkValues, value, from);
if (globalOrdinalJoin) {
document.add(newStringField("type", from ? "from" : "to", Field.Store.NO));
}
for (int j = 0; j < numberOfLinkValues; j++) {
String linkValue = context.randomUniqueValues[random().nextInt(context.randomUniqueValues.length)];
docs[i].linkValues.add(linkValue);
if (from) {
if (!context.fromDocuments.containsKey(linkValue)) {
context.fromDocuments.put(linkValue, new ArrayList<RandomDoc>());
context.fromDocuments.put(linkValue, new ArrayList<>());
}
if (!context.randomValueFromDocs.containsKey(value)) {
context.randomValueFromDocs.put(value, new ArrayList<RandomDoc>());
context.randomValueFromDocs.put(value, new ArrayList<>());
}
context.fromDocuments.get(linkValue).add(docs[i]);
@ -600,12 +771,15 @@ public class TestJoinUtil extends LuceneTestCase {
} else {
document.add(new SortedDocValuesField("from", new BytesRef(linkValue)));
}
if (globalOrdinalJoin) {
document.add(new SortedDocValuesField("join_field", new BytesRef(linkValue)));
}
} else {
if (!context.toDocuments.containsKey(linkValue)) {
context.toDocuments.put(linkValue, new ArrayList<RandomDoc>());
context.toDocuments.put(linkValue, new ArrayList<>());
}
if (!context.randomValueToDocs.containsKey(value)) {
context.randomValueToDocs.put(value, new ArrayList<RandomDoc>());
context.randomValueToDocs.put(value, new ArrayList<>());
}
context.toDocuments.get(linkValue).add(docs[i]);
@ -616,6 +790,9 @@ public class TestJoinUtil extends LuceneTestCase {
} else {
document.add(new SortedDocValuesField("to", new BytesRef(linkValue)));
}
if (globalOrdinalJoin) {
document.add(new SortedDocValuesField("join_field", new BytesRef(linkValue)));
}
}
}
@ -707,6 +884,9 @@ public class TestJoinUtil extends LuceneTestCase {
if (joinScore == null) {
joinValueToJoinScores.put(BytesRef.deepCopyOf(joinValue), joinScore = new JoinScore());
}
if (VERBOSE) {
System.out.println("expected val=" + joinValue.utf8ToString() + " expected score=" + scorer.score());
}
joinScore.addScore(scorer.score());
}
@ -875,6 +1055,7 @@ public class TestJoinUtil extends LuceneTestCase {
Map<String, Map<Integer, JoinScore>> fromHitsToJoinScore = new HashMap<>();
Map<String, Map<Integer, JoinScore>> toHitsToJoinScore = new HashMap<>();
MultiDocValues.OrdinalMap ordinalMap;
}
private static class RandomDoc {
@ -922,4 +1103,29 @@ public class TestJoinUtil extends LuceneTestCase {
}
private static class BitSetCollector extends SimpleCollector {
private final BitSet bitSet;
private int docBase;
private BitSetCollector(BitSet bitSet) {
this.bitSet = bitSet;
}
@Override
public void collect(int doc) throws IOException {
bitSet.set(docBase + doc);
}
@Override
protected void doSetNextReader(LeafReaderContext context) throws IOException {
docBase = context.docBase;
}
@Override
public boolean needsScores() {
return false;
}
}
}

View File

@ -1 +0,0 @@
0ec13f6423eb6d5858e229939a2bc118473ef94c

View File

@ -0,0 +1 @@
016d0bc512222f1253ee6b64d389c84e22f697f0

View File

@ -1 +0,0 @@
11393498b38e9695d0850cac26fde5613ae268b9

View File

@ -0,0 +1 @@
f5aa318bda4c6c8d688c9d00b90681dcd82ce636

View File

@ -43,7 +43,6 @@ import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.IndexWriterConfig;
import org.apache.lucene.index.LeafReaderContext;
import org.apache.lucene.index.RandomIndexWriter;
import org.apache.lucene.index.StorableField;
import org.apache.lucene.index.StoredDocument;
import org.apache.lucene.index.Term;
import org.apache.lucene.queries.TermsQuery;
@ -57,11 +56,9 @@ import org.apache.lucene.search.TopDocs;
import org.apache.lucene.store.Directory;
import org.apache.lucene.util.Bits;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.BytesRefBuilder;
import org.apache.lucene.util.FixedBitSet;
import org.apache.lucene.util.LineFileDocs;
import org.apache.lucene.util.LuceneTestCase;
import org.apache.lucene.util.NumericUtils;
import org.apache.lucene.util.TestUtil;
import org.junit.After;
import org.junit.Before;
@ -158,10 +155,11 @@ public class SuggestFieldTest extends LuceneTestCase {
weights[i] = Math.abs(random().nextLong());
document.add(newSuggestField("suggest_field", "abc", weights[i]));
iw.addDocument(document);
}
if (rarely()) {
if (usually()) {
iw.commit();
}
}
DirectoryReader reader = iw.getReader();
Entry[] expectedEntries = new Entry[num];
@ -200,11 +198,15 @@ public class SuggestFieldTest extends LuceneTestCase {
}
iw.addDocument(document);
document.clear();
if (usually()) {
iw.commit();
}
}
iw.deleteDocuments(new Term("str_field", "delete"));
DirectoryReader reader = DirectoryReader.open(iw, false);
DirectoryReader reader = DirectoryReader.open(iw, true);
SuggestIndexSearcher indexSearcher = new SuggestIndexSearcher(reader, analyzer);
TopSuggestDocs suggest = indexSearcher.suggest("suggest_field", "abc_", numLive);
assertSuggestions(suggest, expectedEntries.toArray(new Entry[expectedEntries.size()]));
@ -224,6 +226,10 @@ public class SuggestFieldTest extends LuceneTestCase {
document.add(newStringField("str_fld", "deleted", Field.Store.NO));
iw.addDocument(document);
document.clear();
if (usually()) {
iw.commit();
}
}
Filter filter = new QueryWrapperFilter(new TermsQuery("str_fld", new BytesRef("non_existent")));
@ -249,11 +255,15 @@ public class SuggestFieldTest extends LuceneTestCase {
document.add(newStringField("delete", "delete", Field.Store.NO));
iw.addDocument(document);
document.clear();
if (usually()) {
iw.commit();
}
}
iw.deleteDocuments(new Term("delete", "delete"));
DirectoryReader reader = DirectoryReader.open(iw, false);
DirectoryReader reader = DirectoryReader.open(iw, true);
SuggestIndexSearcher indexSearcher = new SuggestIndexSearcher(reader, analyzer);
TopSuggestDocs suggest = indexSearcher.suggest("suggest_field", "abc_", num);
assertThat(suggest.totalHits, equalTo(0));
@ -274,6 +284,10 @@ public class SuggestFieldTest extends LuceneTestCase {
document.add(new IntField("weight_fld", i, Field.Store.YES));
iw.addDocument(document);
document.clear();
if (usually()) {
iw.commit();
}
}
iw.deleteDocuments(NumericRangeQuery.newIntRange("weight_fld", 2, null, true, false));
@ -298,6 +312,10 @@ public class SuggestFieldTest extends LuceneTestCase {
document.add(new IntField("filter_int_fld", i, Field.Store.NO));
iw.addDocument(document);
document.clear();
if (usually()) {
iw.commit();
}
}
DirectoryReader reader = iw.getReader();
@ -542,6 +560,10 @@ public class SuggestFieldTest extends LuceneTestCase {
document.add(newSuggestField("suggest_field", suggest, weight));
mappings.put(suggest, weight);
iw.addDocument(document);
if (usually()) {
iw.commit();
}
}
DirectoryReader reader = iw.getReader();

View File

@ -263,6 +263,9 @@ public abstract class SearchEquivalenceTestBase extends LuceneTestCase {
* Both queries will be filtered by <code>filter</code>
*/
protected void assertSubsetOf(Query q1, Query q2, Filter filter) throws Exception {
QueryUtils.check(q1);
QueryUtils.check(q2);
if (filter != null) {
q1 = new FilteredQuery(q1, filter);
q2 = new FilteredQuery(q2, filter);

View File

@ -78,6 +78,9 @@ Detailed Change List
New Features
----------------------
* SOLR-6637: Solr should have a way to restore a core from a backed up index.
(Varun Thacker, noble, shalin)
Bug Fixes
----------------------
@ -90,6 +93,11 @@ Optimizations
* SOLR-7324: IndexFetcher does not need to call isIndexStale if full copy is already needed
(Stephan Lagraulet via Varun Thacker)
Other Changes
----------------------
* SOLR-6865: Upgrade HttpClient to 4.4.1 (Shawn Heisey)
================== 5.1.0 ==================
Consult the LUCENE_CHANGES.txt file for additional, low level, changes in this release

View File

@ -29,6 +29,7 @@ import java.nio.channels.FileChannel;
import java.nio.charset.StandardCharsets;
import java.nio.file.Files;
import java.nio.file.NoSuchFileException;
import java.nio.file.Paths;
import java.text.SimpleDateFormat;
import java.util.ArrayList;
import java.util.Arrays;
@ -246,31 +247,31 @@ public class IndexFetcher {
}
}
boolean fetchLatestIndex(final SolrCore core, boolean forceReplication) throws IOException, InterruptedException {
return fetchLatestIndex(core, forceReplication, false);
boolean fetchLatestIndex(boolean forceReplication) throws IOException, InterruptedException {
return fetchLatestIndex(forceReplication, false);
}
/**
* This command downloads all the necessary files from master to install a index commit point. Only changed files are
* downloaded. It also downloads the conf files (if they are modified).
*
* @param core the SolrCore
* @param forceReplication force a replication in all cases
* @param forceCoreReload force a core reload in all cases
* @return true on success, false if slave is already in sync
* @throws IOException if an exception occurs
*/
boolean fetchLatestIndex(final SolrCore core, boolean forceReplication, boolean forceCoreReload) throws IOException, InterruptedException {
boolean fetchLatestIndex(boolean forceReplication, boolean forceCoreReload) throws IOException, InterruptedException {
boolean cleanupDone = false;
boolean successfulInstall = false;
replicationStartTime = System.currentTimeMillis();
Directory tmpIndexDir = null;
String tmpIndex = null;
String tmpIndex;
Directory indexDir = null;
String indexDirPath = null;
String indexDirPath;
boolean deleteTmpIdxDir = true;
if (!core.getSolrCoreState().getLastReplicateIndexSuccess()) {
if (!solrCore.getSolrCoreState().getLastReplicateIndexSuccess()) {
// if the last replication was not a success, we force a full replication
// when we are a bit more confident we may want to try a partial replication
// if the error is connection related or something, but we have to be careful
@ -279,7 +280,7 @@ public class IndexFetcher {
try {
//get the current 'replicateable' index version in the master
NamedList response = null;
NamedList response;
try {
response = getLatestVersion();
} catch (Exception e) {
@ -290,12 +291,12 @@ public class IndexFetcher {
long latestGeneration = (Long) response.get(GENERATION);
// TODO: make sure that getLatestCommit only returns commit points for the main index (i.e. no side-car indexes)
IndexCommit commit = core.getDeletionPolicy().getLatestCommit();
IndexCommit commit = solrCore.getDeletionPolicy().getLatestCommit();
if (commit == null) {
// Presumably the IndexWriter hasn't been opened yet, and hence the deletion policy hasn't been updated with commit points
RefCounted<SolrIndexSearcher> searcherRefCounted = null;
try {
searcherRefCounted = core.getNewestSearcher(false);
searcherRefCounted = solrCore.getNewestSearcher(false);
if (searcherRefCounted == null) {
LOG.warn("No open searcher found - fetch aborted");
return false;
@ -312,15 +313,14 @@ public class IndexFetcher {
if (forceReplication && commit.getGeneration() != 0) {
// since we won't get the files for an empty index,
// we just clear ours and commit
RefCounted<IndexWriter> iw = core.getUpdateHandler().getSolrCoreState().getIndexWriter(core);
RefCounted<IndexWriter> iw = solrCore.getUpdateHandler().getSolrCoreState().getIndexWriter(solrCore);
try {
iw.get().deleteAll();
} finally {
iw.decref();
}
SolrQueryRequest req = new LocalSolrQueryRequest(core,
new ModifiableSolrParams());
core.getUpdateHandler().commit(new CommitUpdateCommand(req, false));
SolrQueryRequest req = new LocalSolrQueryRequest(solrCore, new ModifiableSolrParams());
solrCore.getUpdateHandler().commit(new CommitUpdateCommand(req, false));
}
//there is nothing to be replicated
@ -340,7 +340,9 @@ public class IndexFetcher {
// get the list of files first
fetchFileList(latestGeneration);
// this can happen if the commit point is deleted before we fetch the file list.
if(filesToDownload.isEmpty()) return false;
if (filesToDownload.isEmpty()) {
return false;
}
LOG.info("Number of files in latest index in master: " + filesToDownload.size());
// Create the sync service
@ -354,13 +356,13 @@ public class IndexFetcher {
|| commit.getGeneration() >= latestGeneration || forceReplication;
String tmpIdxDirName = "index." + new SimpleDateFormat(SnapShooter.DATE_FMT, Locale.ROOT).format(new Date());
tmpIndex = createTempindexDir(core, tmpIdxDirName);
tmpIndex = Paths.get(solrCore.getDataDir(), tmpIdxDirName).toString();
tmpIndexDir = core.getDirectoryFactory().get(tmpIndex, DirContext.DEFAULT, core.getSolrConfig().indexConfig.lockType);
tmpIndexDir = solrCore.getDirectoryFactory().get(tmpIndex, DirContext.DEFAULT, solrCore.getSolrConfig().indexConfig.lockType);
// cindex dir...
indexDirPath = core.getIndexDir();
indexDir = core.getDirectoryFactory().get(indexDirPath, DirContext.DEFAULT, core.getSolrConfig().indexConfig.lockType);
indexDirPath = solrCore.getIndexDir();
indexDir = solrCore.getDirectoryFactory().get(indexDirPath, DirContext.DEFAULT, solrCore.getSolrConfig().indexConfig.lockType);
try {
@ -404,7 +406,7 @@ public class IndexFetcher {
} finally {
writer.decref();
}
solrCore.getUpdateHandler().getSolrCoreState().closeIndexWriter(core, true);
solrCore.getUpdateHandler().getSolrCoreState().closeIndexWriter(solrCore, true);
}
boolean reloadCore = false;
@ -422,7 +424,7 @@ public class IndexFetcher {
reloadCore = true;
downloadConfFiles(confFilesToDownload, latestGeneration);
if (isFullCopyNeeded) {
successfulInstall = modifyIndexProps(tmpIdxDirName);
successfulInstall = IndexFetcher.modifyIndexProps(solrCore, tmpIdxDirName);
deleteTmpIdxDir = false;
} else {
successfulInstall = moveIndexFiles(tmpIndexDir, indexDir);
@ -433,8 +435,8 @@ public class IndexFetcher {
// may be closed
if (indexDir != null) {
LOG.info("removing old index directory " + indexDir);
core.getDirectoryFactory().doneWithDirectory(indexDir);
core.getDirectoryFactory().remove(indexDir);
solrCore.getDirectoryFactory().doneWithDirectory(indexDir);
solrCore.getDirectoryFactory().remove(indexDir);
}
}
@ -446,7 +448,7 @@ public class IndexFetcher {
} else {
terminateAndWaitFsyncService();
if (isFullCopyNeeded) {
successfulInstall = modifyIndexProps(tmpIdxDirName);
successfulInstall = IndexFetcher.modifyIndexProps(solrCore, tmpIdxDirName);
deleteTmpIdxDir = false;
} else {
successfulInstall = moveIndexFiles(tmpIndexDir, indexDir);
@ -458,13 +460,13 @@ public class IndexFetcher {
}
} finally {
if (!isFullCopyNeeded) {
solrCore.getUpdateHandler().getSolrCoreState().openIndexWriter(core);
solrCore.getUpdateHandler().getSolrCoreState().openIndexWriter(solrCore);
}
}
// we must reload the core after we open the IW back up
if (successfulInstall && (reloadCore || forceCoreReload)) {
LOG.info("Reloading SolrCore {}", core.getName());
LOG.info("Reloading SolrCore {}", solrCore.getName());
reloadCore();
}
@ -474,8 +476,8 @@ public class IndexFetcher {
// may be closed
if (indexDir != null) {
LOG.info("removing old index directory " + indexDir);
core.getDirectoryFactory().doneWithDirectory(indexDir);
core.getDirectoryFactory().remove(indexDir);
solrCore.getDirectoryFactory().doneWithDirectory(indexDir);
solrCore.getDirectoryFactory().remove(indexDir);
}
}
if (isFullCopyNeeded) {
@ -486,13 +488,13 @@ public class IndexFetcher {
}
if (!isFullCopyNeeded && !forceReplication && !successfulInstall) {
cleanup(core, tmpIndexDir, indexDir, deleteTmpIdxDir, successfulInstall);
cleanup(solrCore, tmpIndexDir, indexDir, deleteTmpIdxDir, successfulInstall);
cleanupDone = true;
// we try with a full copy of the index
LOG.warn(
"Replication attempt was not successful - trying a full index replication reloadCore={}",
reloadCore);
successfulInstall = fetchLatestIndex(core, true, reloadCore);
successfulInstall = fetchLatestIndex(true, reloadCore);
}
replicationStartTime = 0;
@ -505,11 +507,11 @@ public class IndexFetcher {
} catch (InterruptedException e) {
throw new InterruptedException("Index fetch interrupted");
} catch (Exception e) {
throw new SolrException(SolrException.ErrorCode.SERVER_ERROR, "Index fetch failed : ", e);
throw new SolrException(ErrorCode.SERVER_ERROR, "Index fetch failed : ", e);
}
} finally {
if (!cleanupDone) {
cleanup(core, tmpIndexDir, indexDir, deleteTmpIdxDir, successfulInstall);
cleanup(solrCore, tmpIndexDir, indexDir, deleteTmpIdxDir, successfulInstall);
}
}
}
@ -719,15 +721,6 @@ public class IndexFetcher {
}
/**
* All the files are copied to a temp dir first
*/
private String createTempindexDir(SolrCore core, String tmpIdxDirName) {
// TODO: there should probably be a DirectoryFactory#concatPath(parent, name)
// or something
return core.getDataDir() + tmpIdxDirName;
}
private void reloadCore() {
final CountDownLatch latch = new CountDownLatch(1);
new Thread() {
@ -815,12 +808,12 @@ public class IndexFetcher {
|| filename.startsWith("segments_") || size < _100K);
}
static class CompareResult {
protected static class CompareResult {
boolean equal = false;
boolean checkSummed = false;
}
private CompareResult compareFile(Directory indexDir, String filename, Long backupIndexFileLen, Long backupIndexFileChecksum) {
protected static CompareResult compareFile(Directory indexDir, String filename, Long backupIndexFileLen, Long backupIndexFileChecksum) {
CompareResult compareResult = new CompareResult();
try {
try (final IndexInput indexInput = indexDir.openInput(filename, IOContext.READONCE)) {
@ -887,8 +880,8 @@ public class IndexFetcher {
}
/**
* All the files which are common between master and slave must have same size else we assume they are
* not compatible (stale).
* All the files which are common between master and slave must have same size and same checksum else we assume
* they are not compatible (stale).
*
* @return true if the index stale and we need to download a fresh copy, false otherwise.
* @throws IOException if low level io error
@ -1034,7 +1027,7 @@ public class IndexFetcher {
/**
* If the index is stale by any chance, load index from a different dir in the data dir.
*/
private boolean modifyIndexProps(String tmpIdxDirName) {
protected static boolean modifyIndexProps(SolrCore solrCore, String tmpIdxDirName) {
LOG.info("New index installed. Updating index properties... index="+tmpIdxDirName);
Properties p = new Properties();
Directory dir = null;

View File

@ -0,0 +1,50 @@
package org.apache.solr.handler;
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
import java.io.File;
import java.text.SimpleDateFormat;
import java.util.Date;
import java.util.Locale;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
class OldBackupDirectory implements Comparable<OldBackupDirectory> {
File dir;
Date timestamp;
private final Pattern dirNamePattern = Pattern.compile("^snapshot[.](.*)$");
OldBackupDirectory(File dir) {
if(dir.isDirectory()) {
Matcher m = dirNamePattern.matcher(dir.getName());
if(m.find()) {
try {
this.dir = dir;
this.timestamp = new SimpleDateFormat(SnapShooter.DATE_FMT, Locale.ROOT).parse(m.group(1));
} catch(Exception e) {
this.dir = null;
this.timestamp = null;
}
}
}
}
@Override
public int compareTo(OldBackupDirectory that) {
return that.timestamp.compareTo(this.timestamp);
}
}

View File

@ -36,7 +36,9 @@ import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.Properties;
import java.util.concurrent.ExecutorService;
import java.util.concurrent.Executors;
import java.util.concurrent.Future;
import java.util.concurrent.ScheduledExecutorService;
import java.util.concurrent.TimeUnit;
import java.util.concurrent.atomic.AtomicBoolean;
@ -146,6 +148,13 @@ public class ReplicationHandler extends RequestHandlerBase implements SolrCoreAw
private ReentrantLock indexFetchLock = new ReentrantLock();
private ExecutorService restoreExecutor = Executors.newSingleThreadExecutor(
new DefaultSolrThreadFactory("restoreExecutor"));
private volatile Future<Boolean> restoreFuture;
private volatile String currentRestoreName;
private String includeConfFiles;
private NamedList<String> confFileNameAlias = new NamedList<>();
@ -235,6 +244,11 @@ public class ReplicationHandler extends RequestHandlerBase implements SolrCoreAw
} else if (command.equalsIgnoreCase(CMD_BACKUP)) {
doSnapShoot(new ModifiableSolrParams(solrParams), rsp, req);
rsp.add(STATUS, OK_STATUS);
} else if (command.equalsIgnoreCase(CMD_RESTORE)) {
restore(new ModifiableSolrParams(solrParams), rsp, req);
rsp.add(STATUS, OK_STATUS);
} else if (command.equalsIgnoreCase(CMD_RESTORE_STATUS)) {
rsp.add(CMD_RESTORE_STATUS, getRestoreStatus());
} else if (command.equalsIgnoreCase(CMD_DELETE_BACKUP)) {
deleteSnapshot(new ModifiableSolrParams(solrParams));
rsp.add(STATUS, OK_STATUS);
@ -302,7 +316,7 @@ public class ReplicationHandler extends RequestHandlerBase implements SolrCoreAw
throw new SolrException(ErrorCode.BAD_REQUEST, "Missing mandatory param: name");
}
SnapShooter snapShooter = new SnapShooter(core, params.get("location"), params.get(NAME));
SnapShooter snapShooter = new SnapShooter(core, params.get(LOCATION), params.get(NAME));
snapShooter.validateDeleteSnapshot();
snapShooter.deleteSnapAsync(this);
}
@ -361,7 +375,7 @@ public class ReplicationHandler extends RequestHandlerBase implements SolrCoreAw
} else {
currentIndexFetcher = pollingIndexFetcher;
}
return currentIndexFetcher.fetchLatestIndex(core, forceReplication);
return currentIndexFetcher.fetchLatestIndex(forceReplication);
} catch (Exception e) {
SolrException.log(LOG, "Index fetch failed ", e);
} finally {
@ -377,6 +391,72 @@ public class ReplicationHandler extends RequestHandlerBase implements SolrCoreAw
return indexFetchLock.isLocked();
}
private void restore(SolrParams params, SolrQueryResponse rsp, SolrQueryRequest req) {
if (restoreFuture != null && !restoreFuture.isDone()) {
throw new SolrException(ErrorCode.BAD_REQUEST, "Restore in progress. Cannot run multiple restore operations" +
"for the same core");
}
String name = params.get(NAME);
String location = params.get(LOCATION);
//If location is not provided then assume that the restore index is present inside the data directory.
if (location == null) {
location = core.getDataDir();
}
//If name is not provided then look for the last unnamed( the ones with the snapshot.timestamp format)
//snapshot folder since we allow snapshots to be taken without providing a name. Pick the latest timestamp.
if (name == null) {
File[] files = new File(location).listFiles();
List<OldBackupDirectory> dirs = new ArrayList<>();
for (File f : files) {
OldBackupDirectory obd = new OldBackupDirectory(f);
if (obd.dir != null) {
dirs.add(obd);
}
}
Collections.sort(dirs);
if (dirs.size() == 0) {
throw new SolrException(ErrorCode.BAD_REQUEST, "No backup name specified and none found in " + core.getDataDir());
}
name = dirs.get(0).dir.getName();
} else {
//"snapshot." is prefixed by snapshooter
name = "snapshot." + name;
}
RestoreCore restoreCore = new RestoreCore(core, location, name);
restoreFuture = restoreExecutor.submit(restoreCore);
currentRestoreName = name;
}
private NamedList<Object> getRestoreStatus() {
NamedList<Object> status = new SimpleOrderedMap<>();
if (restoreFuture == null) {
status.add(STATUS, "No restore actions in progress");
return status;
}
status.add("snapshotName", currentRestoreName);
if (restoreFuture.isDone()) {
try {
boolean success = restoreFuture.get();
if (success) {
status.add(STATUS, SUCCESS);
} else {
status.add(STATUS, FAILED);
}
} catch (Exception e) {
status.add(STATUS, FAILED);
status.add(EXCEPTION, e.getMessage());
}
} else {
status.add(STATUS, "In Progress");
}
return status;
}
private void doSnapShoot(SolrParams params, SolrQueryResponse rsp,
SolrQueryRequest req) {
try {
@ -487,7 +567,7 @@ public class ReplicationHandler extends RequestHandlerBase implements SolrCoreAw
result.add(fileMeta);
} catch (IOException e) {
rsp.add("status", "unable to get file names for given index generation");
rsp.add("exception", e);
rsp.add(EXCEPTION, e);
LOG.error("Unable to get file names for indexCommit generation: " + gen, e);
} finally {
if (dir != null) {
@ -1106,6 +1186,19 @@ public class ReplicationHandler extends RequestHandlerBase implements SolrCoreAw
@Override
public void postClose(SolrCore core) {}
});
core.addCloseHook(new CloseHook() {
@Override
public void preClose(SolrCore core) {
ExecutorUtil.shutdownNowAndAwaitTermination(restoreExecutor);
if (restoreFuture != null) {
restoreFuture.cancel(true);
}
}
@Override
public void postClose(SolrCore core) {}
});
}
/**
@ -1407,6 +1500,14 @@ public class ReplicationHandler extends RequestHandlerBase implements SolrCoreAw
return result;
}
private static final String LOCATION = "location";
private static final String SUCCESS = "success";
private static final String FAILED = "failed";
private static final String EXCEPTION = "exception";
public static final String MASTER_URL = "masterUrl";
public static final String STATUS = "status";
@ -1417,6 +1518,10 @@ public class ReplicationHandler extends RequestHandlerBase implements SolrCoreAw
public static final String CMD_BACKUP = "backup";
public static final String CMD_RESTORE = "restore";
public static final String CMD_RESTORE_STATUS = "restorestatus";
public static final String CMD_FETCH_INDEX = "fetchindex";
public static final String CMD_ABORT_FETCH = "abortfetch";

View File

@ -0,0 +1,149 @@
package org.apache.solr.handler;
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
import java.nio.file.Path;
import java.nio.file.Paths;
import java.util.concurrent.Callable;
import java.util.concurrent.Future;
import org.apache.lucene.codecs.CodecUtil;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.FSDirectory;
import org.apache.lucene.store.IOContext;
import org.apache.lucene.store.IndexInput;
import org.apache.solr.common.SolrException;
import org.apache.solr.core.DirectoryFactory;
import org.apache.solr.core.SolrCore;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
public class RestoreCore implements Callable<Boolean> {
private static final Logger log = LoggerFactory.getLogger(RestoreCore.class.getName());
private final String backupName;
private final String backupLocation;
private final SolrCore core;
public RestoreCore(SolrCore core, String location, String name) {
this.core = core;
this.backupLocation = location;
this.backupName = name;
}
@Override
public Boolean call() throws Exception {
return doRestore();
}
private boolean doRestore() throws Exception {
Path backupPath = Paths.get(backupLocation, backupName);
String restoreIndexName = "restore." + backupName;
Path restoreIndexPath = Paths.get(core.getDataDir(), restoreIndexName);
Directory restoreIndexDir = null;
Directory indexDir = null;
try (Directory backupDir = FSDirectory.open(backupPath)) {
restoreIndexDir = core.getDirectoryFactory().get(restoreIndexPath.toString(),
DirectoryFactory.DirContext.DEFAULT, core.getSolrConfig().indexConfig.lockType);
//Prefer local copy.
indexDir = core.getDirectoryFactory().get(core.getIndexDir(),
DirectoryFactory.DirContext.DEFAULT, core.getSolrConfig().indexConfig.lockType);
//Move all files from backupDir to restoreIndexDir
for (String filename : backupDir.listAll()) {
checkInterrupted();
log.info("Copying over file to restore directory " + filename);
try (IndexInput indexInput = backupDir.openInput(filename, IOContext.READONCE)) {
long checksum = CodecUtil.retrieveChecksum(indexInput);
long length = indexInput.length();
IndexFetcher.CompareResult compareResult = IndexFetcher.compareFile(indexDir, filename, length, checksum);
if (!compareResult.equal || (!compareResult.checkSummed && (filename.endsWith(".si")
|| filename.endsWith(".liv") || filename.startsWith("segments_")))) {
restoreIndexDir.copyFrom(backupDir, filename, filename, IOContext.READONCE);
} else {
//prefer local copy
restoreIndexDir.copyFrom(indexDir, filename, filename, IOContext.READONCE);
}
} catch (Exception e) {
throw new SolrException(SolrException.ErrorCode.UNKNOWN, "Exception while restoring the backup index", e);
}
}
log.debug("Switching directories");
IndexFetcher.modifyIndexProps(core, restoreIndexName);
boolean success;
try {
core.getUpdateHandler().newIndexWriter(false);
openNewSearcher();
success = true;
log.info("Successfully restored to the backup index");
} catch (Exception e) {
//Rollback to the old index directory. Delete the restore index directory and mark the restore as failed.
log.info("Could not switch to restored index. Rolling back to the current index");
Directory dir = null;
try {
dir = core.getDirectoryFactory().get(core.getDataDir(), DirectoryFactory.DirContext.META_DATA,
core.getSolrConfig().indexConfig.lockType);
dir.deleteFile(IndexFetcher.INDEX_PROPERTIES);
} finally {
if (dir != null) {
core.getDirectoryFactory().release(dir);
}
}
core.getDirectoryFactory().doneWithDirectory(restoreIndexDir);
core.getDirectoryFactory().remove(restoreIndexDir);
core.getUpdateHandler().newIndexWriter(false);
openNewSearcher();
throw new SolrException(SolrException.ErrorCode.UNKNOWN, "Exception while restoring the backup index", e);
}
if (success) {
core.getDirectoryFactory().doneWithDirectory(indexDir);
core.getDirectoryFactory().remove(indexDir);
}
return true;
} finally {
if (restoreIndexDir != null) {
core.getDirectoryFactory().release(restoreIndexDir);
}
if (indexDir != null) {
core.getDirectoryFactory().release(indexDir);
}
}
}
private void checkInterrupted() throws InterruptedException {
if (Thread.currentThread().isInterrupted()) {
throw new InterruptedException("Stopping restore process. Thread was interrupted.");
}
}
private void openNewSearcher() throws Exception {
Future[] waitSearcher = new Future[1];
core.getSearcher(true, false, waitSearcher, true);
if (waitSearcher[0] != null) {
waitSearcher[0].get();
}
}
}

View File

@ -18,6 +18,7 @@ package org.apache.solr.handler;
import java.io.File;
import java.io.IOException;
import java.nio.file.Paths;
import java.text.SimpleDateFormat;
import java.util.ArrayList;
import java.util.Collection;
@ -58,10 +59,11 @@ public class SnapShooter {
public SnapShooter(SolrCore core, String location, String snapshotName) {
solrCore = core;
if (location == null) snapDir = core.getDataDir();
if (location == null) {
snapDir = core.getDataDir();
}
else {
File base = new File(core.getCoreDescriptor().getInstanceDir());
snapDir = org.apache.solr.util.FileUtils.resolvePath(base, location).getAbsolutePath();
snapDir = Paths.get(core.getCoreDescriptor().getInstanceDir()).resolve(location).toAbsolutePath().toString();
}
this.snapshotName = snapshotName;
@ -125,7 +127,7 @@ public class SnapShooter {
}
void createSnapshot(final IndexCommit indexCommit, ReplicationHandler replicationHandler) {
LOG.info("Creating backup snapshot...");
LOG.info("Creating backup snapshot " + (snapshotName == null ? "<not named>" : snapshotName));
NamedList<Object> details = new NamedList<>();
details.add("startTime", new Date().toString());
try {
@ -193,31 +195,6 @@ public class SnapShooter {
replicationHandler.snapShootDetails = details;
}
private class OldBackupDirectory implements Comparable<OldBackupDirectory>{
File dir;
Date timestamp;
final Pattern dirNamePattern = Pattern.compile("^snapshot[.](.*)$");
OldBackupDirectory(File dir) {
if(dir.isDirectory()) {
Matcher m = dirNamePattern.matcher(dir.getName());
if(m.find()) {
try {
this.dir = dir;
this.timestamp = new SimpleDateFormat(DATE_FMT, Locale.ROOT).parse(m.group(1));
} catch(Exception e) {
this.dir = null;
this.timestamp = null;
}
}
}
}
@Override
public int compareTo(OldBackupDirectory that) {
return that.timestamp.compareTo(this.timestamp);
}
}
public static final String DATE_FMT = "yyyyMMddHHmmssSSS";

View File

@ -121,7 +121,7 @@ public class TestReplicationHandlerBackup extends SolrJettyTestBase {
@Test
public void testBackupOnCommit() throws Exception {
//Index
int nDocs = indexDocs();
int nDocs = indexDocs(masterClient);
//Confirm if completed
CheckBackupStatus checkBackupStatus = new CheckBackupStatus((HttpSolrClient) masterClient);
@ -146,7 +146,7 @@ public class TestReplicationHandlerBackup extends SolrJettyTestBase {
}
}
private int indexDocs() throws IOException, SolrServerException {
protected static int indexDocs(SolrClient masterClient) throws IOException, SolrServerException {
int nDocs = TestUtil.nextInt(random(), 1, 100);
masterClient.deleteByQuery("*:*");
for (int i = 0; i < nDocs; i++) {
@ -164,7 +164,7 @@ public class TestReplicationHandlerBackup extends SolrJettyTestBase {
@Test
public void doTestBackup() throws Exception {
int nDocs = indexDocs();
int nDocs = indexDocs(masterClient);
Path[] snapDir = new Path[5]; //One extra for the backup on commit
//First snapshot location
@ -180,17 +180,16 @@ public class TestReplicationHandlerBackup extends SolrJettyTestBase {
backupNames = new String[4];
}
for (int i = 0; i < 4; i++) {
BackupCommand backupCommand;
final String backupName = TestUtil.randomSimpleString(random(), 1, 20);
if (!namedBackup) {
backupCommand = new BackupCommand(addNumberToKeepInRequest, backupKeepParamName, ReplicationHandler.CMD_BACKUP);
if (addNumberToKeepInRequest) {
runBackupCommand(masterJetty, ReplicationHandler.CMD_BACKUP, "&" + backupKeepParamName + "=2");
} else {
backupCommand = new BackupCommand(backupName, ReplicationHandler.CMD_BACKUP);
backupNames[i] = backupName;
runBackupCommand(masterJetty, ReplicationHandler.CMD_BACKUP, "");
}
backupCommand.runCommand();
if (backupCommand.fail != null) {
fail(backupCommand.fail);
} else {
runBackupCommand(masterJetty, ReplicationHandler.CMD_BACKUP, "&name=" + backupName);
backupNames[i] = backupName;
}
CheckBackupStatus checkBackupStatus = new CheckBackupStatus((HttpSolrClient) masterClient, firstBackupTimestamp);
@ -253,8 +252,7 @@ public class TestReplicationHandlerBackup extends SolrJettyTestBase {
private void testDeleteNamedBackup(String backupNames[]) throws InterruptedException, IOException {
String lastTimestamp = null;
for (int i = 0; i < 2; i++) {
BackupCommand deleteBackupCommand = new BackupCommand(backupNames[i], ReplicationHandler.CMD_DELETE_BACKUP);
deleteBackupCommand.runCommand();
runBackupCommand(masterJetty, ReplicationHandler.CMD_DELETE_BACKUP, "&name=" +backupNames[i]);
CheckDeleteBackupStatus checkDeleteBackupStatus = new CheckDeleteBackupStatus(backupNames[i], lastTimestamp);
while (true) {
boolean success = checkDeleteBackupStatus.fetchStatus();
@ -267,53 +265,20 @@ public class TestReplicationHandlerBackup extends SolrJettyTestBase {
}
Thread.sleep(200);
}
if (deleteBackupCommand.fail != null) {
fail(deleteBackupCommand.fail);
}
}
}
private class BackupCommand {
String fail = null;
final boolean addNumberToKeepInRequest;
String backupKeepParamName;
String backupName;
String cmd;
BackupCommand(boolean addNumberToKeepInRequest, String backupKeepParamName, String command) {
this.addNumberToKeepInRequest = addNumberToKeepInRequest;
this.backupKeepParamName = backupKeepParamName;
this.cmd = command;
}
BackupCommand(String backupName, String command) {
this.backupName = backupName;
addNumberToKeepInRequest = false;
this.cmd = command;
}
public void runCommand() {
String masterUrl;
if(backupName != null) {
masterUrl = buildUrl(masterJetty.getLocalPort(), context) + "/" + DEFAULT_TEST_CORENAME + "/replication?command=" + cmd +
"&name=" + backupName;
} else {
masterUrl = buildUrl(masterJetty.getLocalPort(), context) + "/" + DEFAULT_TEST_CORENAME + "/replication?command=" + cmd +
(addNumberToKeepInRequest ? "&" + backupKeepParamName + "=2" : "");
}
public static void runBackupCommand(JettySolrRunner masterJetty, String cmd, String params) throws IOException {
String masterUrl = buildUrl(masterJetty.getLocalPort(), context) + "/" + DEFAULT_TEST_CORENAME
+ "/replication?command=" + cmd + params;
InputStream stream = null;
try {
URL url = new URL(masterUrl);
stream = url.openStream();
stream.close();
} catch (Exception e) {
fail = e.getMessage();
} finally {
IOUtils.closeQuietly(stream);
}
}
}
private class CheckDeleteBackupStatus {
@ -349,6 +314,6 @@ public class TestReplicationHandlerBackup extends SolrJettyTestBase {
IOUtils.closeQuietly(stream);
}
return false;
};
}
}
}

View File

@ -0,0 +1,243 @@
package org.apache.solr.handler;
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
import java.io.File;
import java.io.IOException;
import java.io.InputStream;
import java.net.URL;
import java.net.URLEncoder;
import java.nio.file.Files;
import java.nio.file.Path;
import java.nio.file.Paths;
import java.util.regex.Pattern;
import org.apache.commons.io.IOUtils;
import org.apache.lucene.index.IndexFileNames;
import org.apache.lucene.util.TestUtil;
import org.apache.solr.SolrJettyTestBase;
import org.apache.solr.SolrTestCaseJ4;
import org.apache.solr.client.solrj.SolrClient;
import org.apache.solr.client.solrj.SolrServerException;
import org.apache.solr.client.solrj.embedded.JettySolrRunner;
import org.apache.solr.client.solrj.impl.HttpSolrClient;
import org.apache.solr.client.solrj.response.QueryResponse;
import org.apache.solr.common.SolrInputDocument;
import org.apache.solr.common.params.ModifiableSolrParams;
import org.apache.solr.util.FileUtils;
import org.junit.After;
import org.junit.Before;
import org.junit.Test;
@SolrTestCaseJ4.SuppressSSL // Currently unknown why SSL does not work with this test
public class TestRestoreCore extends SolrJettyTestBase {
JettySolrRunner masterJetty;
TestReplicationHandler.SolrInstance master = null;
SolrClient masterClient;
private static final String CONF_DIR = "solr" + File.separator + "collection1" + File.separator + "conf"
+ File.separator;
private static String context = "/solr";
private static JettySolrRunner createJetty(TestReplicationHandler.SolrInstance instance) throws Exception {
FileUtils.copyFile(new File(SolrTestCaseJ4.TEST_HOME(), "solr.xml"), new File(instance.getHomeDir(), "solr.xml"));
JettySolrRunner jetty = new JettySolrRunner(instance.getHomeDir(), "/solr", 0);
jetty.setDataDir(instance.getDataDir());
jetty.start();
return jetty;
}
private static SolrClient createNewSolrClient(int port) {
try {
// setup the client...
HttpSolrClient client = new HttpSolrClient(buildUrl(port, context) + "/" + DEFAULT_TEST_CORENAME);
client.setConnectionTimeout(15000);
client.setSoTimeout(60000);
client.setDefaultMaxConnectionsPerHost(100);
client.setMaxTotalConnections(100);
return client;
}
catch (Exception ex) {
throw new RuntimeException(ex);
}
}
@Before
public void setUp() throws Exception {
super.setUp();
String configFile = "solrconfig-master.xml";
master = new TestReplicationHandler.SolrInstance(createTempDir("solr-instance").toFile(), "master", null);
master.setUp();
master.copyConfigFile(CONF_DIR + configFile, "solrconfig.xml");
masterJetty = createJetty(master);
masterClient = createNewSolrClient(masterJetty.getLocalPort());
}
@Override
@After
public void tearDown() throws Exception {
super.tearDown();
masterClient.close();
masterClient = null;
masterJetty.stop();
masterJetty = null;
master = null;
}
@Test
public void testSimpleRestore() throws Exception {
int nDocs = TestReplicationHandlerBackup.indexDocs(masterClient);
String snapshotName;
String location;
String params = "";
//Use the default backup location or an externally provided location.
if (random().nextBoolean()) {
location = createTempDir().toFile().getAbsolutePath();
params += "&location=" + URLEncoder.encode(location, "UTF-8");
}
//named snapshot vs default snapshot name
if (random().nextBoolean()) {
snapshotName = TestUtil.randomSimpleString(random(), 1, 5);
params += "&name=" + snapshotName;
}
TestReplicationHandlerBackup.runBackupCommand(masterJetty, ReplicationHandler.CMD_BACKUP, params);
CheckBackupStatus checkBackupStatus = new CheckBackupStatus((HttpSolrClient) masterClient, null);
while (!checkBackupStatus.success) {
checkBackupStatus.fetchStatus();
Thread.sleep(1000);
}
//Modify existing index before we call restore.
//Delete a few docs
int numDeletes = TestUtil.nextInt(random(), 1, nDocs);
for(int i=0; i<numDeletes; i++) {
masterClient.deleteByQuery("id:" + i);
}
masterClient.commit();
//Add a few more
int moreAdds = TestUtil.nextInt(random(), 1, 100);
for (int i=0; i<moreAdds; i++) {
SolrInputDocument doc = new SolrInputDocument();
doc.addField("id", i + nDocs);
doc.addField("name", "name = " + (i + nDocs));
masterClient.add(doc);
}
//Purposely not calling commit once in a while. There can be some docs which are not committed
if (usually()) {
masterClient.commit();
}
TestReplicationHandlerBackup.runBackupCommand(masterJetty, ReplicationHandler.CMD_RESTORE, params);
while (!fetchRestoreStatus()) {
Thread.sleep(1000);
}
//See if restore was successful by checking if all the docs are present again
verifyDocs(nDocs);
}
@Test
public void testFailedRestore() throws Exception {
int nDocs = TestReplicationHandlerBackup.indexDocs(masterClient);
String location = createTempDir().toFile().getAbsolutePath();
String snapshotName = TestUtil.randomSimpleString(random(), 1, 5);
String params = "&name=" + snapshotName + "&location=" + URLEncoder.encode(location, "UTF-8");
TestReplicationHandlerBackup.runBackupCommand(masterJetty, ReplicationHandler.CMD_BACKUP, params);
CheckBackupStatus checkBackupStatus = new CheckBackupStatus((HttpSolrClient) masterClient, null);
while (!checkBackupStatus.success) {
checkBackupStatus.fetchStatus();
Thread.sleep(1000);
}
//Remove the segments_n file so that the backup index is corrupted.
//Restore should fail and it should automatically rollback to the original index.
Path restoreIndexPath = Paths.get(location, "snapshot." + snapshotName);
Path segmentFileName = Files.newDirectoryStream(restoreIndexPath, IndexFileNames.SEGMENTS + "*").iterator().next();
Files.delete(segmentFileName);
TestReplicationHandlerBackup.runBackupCommand(masterJetty, ReplicationHandler.CMD_RESTORE, params);
try {
while (!fetchRestoreStatus()) {
Thread.sleep(1000);
}
fail("Should have thrown an error because restore could not have been successful");
} catch (AssertionError e) {
//supposed to happen
}
verifyDocs(nDocs);
//make sure we can write to the index again
nDocs = TestReplicationHandlerBackup.indexDocs(masterClient);
verifyDocs(nDocs);
}
private void verifyDocs(int nDocs) throws SolrServerException, IOException {
ModifiableSolrParams queryParams = new ModifiableSolrParams();
queryParams.set("q", "*:*");
QueryResponse response = masterClient.query(queryParams);
assertEquals(0, response.getStatus());
assertEquals(nDocs, response.getResults().getNumFound());
}
private boolean fetchRestoreStatus() throws IOException {
String masterUrl = buildUrl(masterJetty.getLocalPort(), context) + "/" + DEFAULT_TEST_CORENAME +
"/replication?command=" + ReplicationHandler.CMD_RESTORE_STATUS;
final Pattern pException = Pattern.compile("<str name=\"exception\">(.*?)</str>");
InputStream stream = null;
try {
URL url = new URL(masterUrl);
stream = url.openStream();
String response = IOUtils.toString(stream, "UTF-8");
if(pException.matcher(response).find()) {
fail("Failed to complete restore action");
}
if(response.contains("<str name=\"status\">success</str>")) {
return true;
} else if (response.contains("<str name=\"status\">failed</str>")){
fail("Restore Failed");
}
stream.close();
} finally {
IOUtils.closeQuietly(stream);
}
return false;
}
}

View File

@ -1 +0,0 @@
0ec13f6423eb6d5858e229939a2bc118473ef94c

View File

@ -0,0 +1 @@
016d0bc512222f1253ee6b64d389c84e22f697f0

View File

@ -1 +0,0 @@
11393498b38e9695d0850cac26fde5613ae268b9

View File

@ -0,0 +1 @@
f5aa318bda4c6c8d688c9d00b90681dcd82ce636

View File

@ -1 +0,0 @@
f7899276dddd01d8a42ecfe27e7031fcf9824422

View File

@ -0,0 +1 @@
2f8757f5ac5e38f46c794e5229d1f3c522e9b1df