From 989cf865339175ad041f9f638c08408d59c1be50 Mon Sep 17 00:00:00 2001
From: Michael McCandless
Date: Thu, 2 Apr 2015 20:23:03 +0000
Subject: [PATCH 1/9] LUCENE-6386: correct javadocs about temp disk space
required for forceMerge(1)
git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1670959 13f79535-47bb-0310-9956-ffa450edef68
---
lucene/CHANGES.txt | 4 ++++
.../org/apache/lucene/index/IndexWriter.java | 17 +++++++++--------
.../lucene/index/TestIndexWriterForceMerge.java | 4 ++--
3 files changed, 15 insertions(+), 10 deletions(-)
diff --git a/lucene/CHANGES.txt b/lucene/CHANGES.txt
index 0d8221dc884..0468133a323 100644
--- a/lucene/CHANGES.txt
+++ b/lucene/CHANGES.txt
@@ -123,6 +123,10 @@ Bug Fixes
DocumentsWriterStallControl to prevent hangs during indexing if we
miss a .notify/All somewhere (Mike McCandless)
+* LUCENE-6386: Correct IndexWriter.forceMerge documentation to state
+ that up to 3X (X = current index size) spare disk space may be needed
+ to complete forceMerge(1). (Robert Muir, Shai Erera, Mike McCandless)
+
Optimizations
* LUCENE-6183, LUCENE-5647: Avoid recompressing stored fields
diff --git a/lucene/core/src/java/org/apache/lucene/index/IndexWriter.java b/lucene/core/src/java/org/apache/lucene/index/IndexWriter.java
index a64be82aef2..47267682017 100644
--- a/lucene/core/src/java/org/apache/lucene/index/IndexWriter.java
+++ b/lucene/core/src/java/org/apache/lucene/index/IndexWriter.java
@@ -1547,14 +1547,15 @@ public class IndexWriter implements Closeable, TwoPhaseCommit, Accountable {
* longer be changed).
*
*
Note that this requires free space that is proportional
- * to the size of the index in your Directory (2X if you're
- * using compound file format). For example, if your index
- * size is 10 MB then you need an additional 10 MB free for
- * this to complete (20 MB if you're using compound file
- * format). This is also affected by the {@link Codec} that
- * is used to execute the merge, and may result in even a
- * bigger index. Also, it's best to call {@link #commit()}
- * afterwards, to allow IndexWriter to free up disk space.
+ * to the size of the index in your Directory: 2X if you are
+ * not using compound file format, and 3X if you are.
+ * For example, if your index size is 10 MB then you need
+ * an additional 20 MB free for this to complete (30 MB if
+ * you're using compound file format). This is also affected
+ * by the {@link Codec} that is used to execute the merge,
+ * and may result in even a bigger index. Also, it's best
+ * to call {@link #commit()} afterwards, to allow IndexWriter
+ * to free up disk space.
*
*
If some but not all readers re-open while merging
* is underway, this will cause {@code > 2X} temporary
diff --git a/lucene/core/src/test/org/apache/lucene/index/TestIndexWriterForceMerge.java b/lucene/core/src/test/org/apache/lucene/index/TestIndexWriterForceMerge.java
index 9c219e4cadd..19030cc00a1 100644
--- a/lucene/core/src/test/org/apache/lucene/index/TestIndexWriterForceMerge.java
+++ b/lucene/core/src/test/org/apache/lucene/index/TestIndexWriterForceMerge.java
@@ -199,8 +199,8 @@ public class TestIndexWriterForceMerge extends LuceneTestCase {
assertTrue("forceMerge used too much temporary space: starting usage was "
+ startDiskUsage + " bytes; final usage was " + finalDiskUsage
+ " bytes; max temp usage was " + maxDiskUsage
- + " but should have been " + (3 * maxStartFinalDiskUsage)
- + " (= 3X starting usage), BEFORE=" + startListing + "AFTER=" + listFiles(dir), maxDiskUsage <= 3 * maxStartFinalDiskUsage);
+ + " but should have been at most " + (4 * maxStartFinalDiskUsage)
+ + " (= 4X starting usage), BEFORE=" + startListing + "AFTER=" + listFiles(dir), maxDiskUsage <= 4 * maxStartFinalDiskUsage);
dir.close();
}
From 548edc5406aaab07ab633d5c15caec881e1ecca3 Mon Sep 17 00:00:00 2001
From: Areek Zillur
Date: Thu, 2 Apr 2015 21:17:34 +0000
Subject: [PATCH 2/9] LUCENE-6339: fix test bug (ensure opening nrt reader with
applyAllDeletes)
git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1670972 13f79535-47bb-0310-9956-ffa450edef68
---
.../suggest/document/SuggestFieldTest.java | 38 +++++++++++++++----
1 file changed, 30 insertions(+), 8 deletions(-)
diff --git a/lucene/suggest/src/test/org/apache/lucene/search/suggest/document/SuggestFieldTest.java b/lucene/suggest/src/test/org/apache/lucene/search/suggest/document/SuggestFieldTest.java
index 4c9d3a4c7dd..1bd442e82f1 100644
--- a/lucene/suggest/src/test/org/apache/lucene/search/suggest/document/SuggestFieldTest.java
+++ b/lucene/suggest/src/test/org/apache/lucene/search/suggest/document/SuggestFieldTest.java
@@ -43,7 +43,6 @@ import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.IndexWriterConfig;
import org.apache.lucene.index.LeafReaderContext;
import org.apache.lucene.index.RandomIndexWriter;
-import org.apache.lucene.index.StorableField;
import org.apache.lucene.index.StoredDocument;
import org.apache.lucene.index.Term;
import org.apache.lucene.queries.TermsQuery;
@@ -57,11 +56,9 @@ import org.apache.lucene.search.TopDocs;
import org.apache.lucene.store.Directory;
import org.apache.lucene.util.Bits;
import org.apache.lucene.util.BytesRef;
-import org.apache.lucene.util.BytesRefBuilder;
import org.apache.lucene.util.FixedBitSet;
import org.apache.lucene.util.LineFileDocs;
import org.apache.lucene.util.LuceneTestCase;
-import org.apache.lucene.util.NumericUtils;
import org.apache.lucene.util.TestUtil;
import org.junit.After;
import org.junit.Before;
@@ -158,9 +155,10 @@ public class SuggestFieldTest extends LuceneTestCase {
weights[i] = Math.abs(random().nextLong());
document.add(newSuggestField("suggest_field", "abc", weights[i]));
iw.addDocument(document);
- }
- if (rarely()) {
- iw.commit();
+
+ if (usually()) {
+ iw.commit();
+ }
}
DirectoryReader reader = iw.getReader();
@@ -200,11 +198,15 @@ public class SuggestFieldTest extends LuceneTestCase {
}
iw.addDocument(document);
document.clear();
+
+ if (usually()) {
+ iw.commit();
+ }
}
iw.deleteDocuments(new Term("str_field", "delete"));
- DirectoryReader reader = DirectoryReader.open(iw, false);
+ DirectoryReader reader = DirectoryReader.open(iw, true);
SuggestIndexSearcher indexSearcher = new SuggestIndexSearcher(reader, analyzer);
TopSuggestDocs suggest = indexSearcher.suggest("suggest_field", "abc_", numLive);
assertSuggestions(suggest, expectedEntries.toArray(new Entry[expectedEntries.size()]));
@@ -224,6 +226,10 @@ public class SuggestFieldTest extends LuceneTestCase {
document.add(newStringField("str_fld", "deleted", Field.Store.NO));
iw.addDocument(document);
document.clear();
+
+ if (usually()) {
+ iw.commit();
+ }
}
Filter filter = new QueryWrapperFilter(new TermsQuery("str_fld", new BytesRef("non_existent")));
@@ -249,11 +255,15 @@ public class SuggestFieldTest extends LuceneTestCase {
document.add(newStringField("delete", "delete", Field.Store.NO));
iw.addDocument(document);
document.clear();
+
+ if (usually()) {
+ iw.commit();
+ }
}
iw.deleteDocuments(new Term("delete", "delete"));
- DirectoryReader reader = DirectoryReader.open(iw, false);
+ DirectoryReader reader = DirectoryReader.open(iw, true);
SuggestIndexSearcher indexSearcher = new SuggestIndexSearcher(reader, analyzer);
TopSuggestDocs suggest = indexSearcher.suggest("suggest_field", "abc_", num);
assertThat(suggest.totalHits, equalTo(0));
@@ -274,6 +284,10 @@ public class SuggestFieldTest extends LuceneTestCase {
document.add(new IntField("weight_fld", i, Field.Store.YES));
iw.addDocument(document);
document.clear();
+
+ if (usually()) {
+ iw.commit();
+ }
}
iw.deleteDocuments(NumericRangeQuery.newIntRange("weight_fld", 2, null, true, false));
@@ -298,6 +312,10 @@ public class SuggestFieldTest extends LuceneTestCase {
document.add(new IntField("filter_int_fld", i, Field.Store.NO));
iw.addDocument(document);
document.clear();
+
+ if (usually()) {
+ iw.commit();
+ }
}
DirectoryReader reader = iw.getReader();
@@ -542,6 +560,10 @@ public class SuggestFieldTest extends LuceneTestCase {
document.add(newSuggestField("suggest_field", suggest, weight));
mappings.put(suggest, weight);
iw.addDocument(document);
+
+ if (usually()) {
+ iw.commit();
+ }
}
DirectoryReader reader = iw.getReader();
From c4d9d6b3f10d4128b820804830abf8c9d44a9d3a Mon Sep 17 00:00:00 2001
From: Martijn van Groningen
Date: Thu, 2 Apr 2015 22:00:26 +0000
Subject: [PATCH 3/9] LUCENE-6352: Added a new query time join to the join
module that uses global ordinals, which is faster for subsequent joins
between reopens.
git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1670990 13f79535-47bb-0310-9956-ffa450edef68
---
lucene/CHANGES.txt | 4 +
.../search/join/BaseGlobalOrdinalScorer.java | 97 +++++
.../search/join/GlobalOrdinalsCollector.java | 114 ++++++
.../search/join/GlobalOrdinalsQuery.java | 245 ++++++++++++
.../GlobalOrdinalsWithScoreCollector.java | 250 ++++++++++++
.../join/GlobalOrdinalsWithScoreQuery.java | 256 ++++++++++++
.../apache/lucene/search/join/JoinUtil.java | 79 ++++
.../lucene/search/join/TestJoinUtil.java | 372 ++++++++++++++----
8 files changed, 1334 insertions(+), 83 deletions(-)
create mode 100644 lucene/join/src/java/org/apache/lucene/search/join/BaseGlobalOrdinalScorer.java
create mode 100644 lucene/join/src/java/org/apache/lucene/search/join/GlobalOrdinalsCollector.java
create mode 100644 lucene/join/src/java/org/apache/lucene/search/join/GlobalOrdinalsQuery.java
create mode 100644 lucene/join/src/java/org/apache/lucene/search/join/GlobalOrdinalsWithScoreCollector.java
create mode 100644 lucene/join/src/java/org/apache/lucene/search/join/GlobalOrdinalsWithScoreQuery.java
diff --git a/lucene/CHANGES.txt b/lucene/CHANGES.txt
index 0468133a323..4ef155e4551 100644
--- a/lucene/CHANGES.txt
+++ b/lucene/CHANGES.txt
@@ -45,6 +45,10 @@ New Features
faster intersection by avoiding loading positions in certain cases.
(Paul Elschot, Robert Muir via Mike McCandless)
+* LUCENE-6352: Added a new query time join to the join module that uses
+ global ordinals, which is faster for subsequent joins between reopens.
+ (Martijn van Groningen, Adrien Grand)
+
Optimizations
* LUCENE-6379: IndexWriter.deleteDocuments(Query...) now detects if
diff --git a/lucene/join/src/java/org/apache/lucene/search/join/BaseGlobalOrdinalScorer.java b/lucene/join/src/java/org/apache/lucene/search/join/BaseGlobalOrdinalScorer.java
new file mode 100644
index 00000000000..4d81d58f92a
--- /dev/null
+++ b/lucene/join/src/java/org/apache/lucene/search/join/BaseGlobalOrdinalScorer.java
@@ -0,0 +1,97 @@
+package org.apache.lucene.search.join;
+
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import org.apache.lucene.index.SortedDocValues;
+import org.apache.lucene.search.DocIdSetIterator;
+import org.apache.lucene.search.Scorer;
+import org.apache.lucene.search.TwoPhaseIterator;
+import org.apache.lucene.search.Weight;
+import org.apache.lucene.util.LongBitSet;
+
+import java.io.IOException;
+
+abstract class BaseGlobalOrdinalScorer extends Scorer {
+
+ final LongBitSet foundOrds;
+ final SortedDocValues values;
+ final Scorer approximationScorer;
+
+ float score;
+
+ public BaseGlobalOrdinalScorer(Weight weight, LongBitSet foundOrds, SortedDocValues values, Scorer approximationScorer) {
+ super(weight);
+ this.foundOrds = foundOrds;
+ this.values = values;
+ this.approximationScorer = approximationScorer;
+ }
+
+ @Override
+ public float score() throws IOException {
+ return score;
+ }
+
+ @Override
+ public int docID() {
+ return approximationScorer.docID();
+ }
+
+ @Override
+ public int nextDoc() throws IOException {
+ return advance(approximationScorer.docID() + 1);
+ }
+
+ @Override
+ public TwoPhaseIterator asTwoPhaseIterator() {
+ final DocIdSetIterator approximation = new DocIdSetIterator() {
+ @Override
+ public int docID() {
+ return approximationScorer.docID();
+ }
+
+ @Override
+ public int nextDoc() throws IOException {
+ return approximationScorer.nextDoc();
+ }
+
+ @Override
+ public int advance(int target) throws IOException {
+ return approximationScorer.advance(target);
+ }
+
+ @Override
+ public long cost() {
+ return approximationScorer.cost();
+ }
+ };
+ return createTwoPhaseIterator(approximation);
+ }
+
+ @Override
+ public long cost() {
+ return approximationScorer.cost();
+ }
+
+ @Override
+ public int freq() throws IOException {
+ return 1;
+ }
+
+ protected abstract TwoPhaseIterator createTwoPhaseIterator(DocIdSetIterator approximation);
+
+}
\ No newline at end of file
diff --git a/lucene/join/src/java/org/apache/lucene/search/join/GlobalOrdinalsCollector.java b/lucene/join/src/java/org/apache/lucene/search/join/GlobalOrdinalsCollector.java
new file mode 100644
index 00000000000..8a874621a79
--- /dev/null
+++ b/lucene/join/src/java/org/apache/lucene/search/join/GlobalOrdinalsCollector.java
@@ -0,0 +1,114 @@
+package org.apache.lucene.search.join;
+
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import org.apache.lucene.index.DocValues;
+import org.apache.lucene.index.LeafReaderContext;
+import org.apache.lucene.index.MultiDocValues;
+import org.apache.lucene.index.SortedDocValues;
+import org.apache.lucene.search.Collector;
+import org.apache.lucene.search.LeafCollector;
+import org.apache.lucene.search.Scorer;
+import org.apache.lucene.util.LongBitSet;
+import org.apache.lucene.util.LongValues;
+
+import java.io.IOException;
+
+/**
+ * A collector that collects all ordinals from a specified field matching the query.
+ *
+ * @lucene.experimental
+ */
+final class GlobalOrdinalsCollector implements Collector {
+
+ final String field;
+ final LongBitSet collectedOrds;
+ final MultiDocValues.OrdinalMap ordinalMap;
+
+ GlobalOrdinalsCollector(String field, MultiDocValues.OrdinalMap ordinalMap, long valueCount) {
+ this.field = field;
+ this.ordinalMap = ordinalMap;
+ this.collectedOrds = new LongBitSet(valueCount);
+ }
+
+ public LongBitSet getCollectorOrdinals() {
+ return collectedOrds;
+ }
+
+ @Override
+ public boolean needsScores() {
+ return false;
+ }
+
+ @Override
+ public LeafCollector getLeafCollector(LeafReaderContext context) throws IOException {
+ SortedDocValues docTermOrds = DocValues.getSorted(context.reader(), field);
+ if (ordinalMap != null) {
+ LongValues segmentOrdToGlobalOrdLookup = ordinalMap.getGlobalOrds(context.ord);
+ return new OrdinalMapCollector(docTermOrds, segmentOrdToGlobalOrdLookup);
+ } else {
+ return new SegmentOrdinalCollector(docTermOrds);
+ }
+ }
+
+ final class OrdinalMapCollector implements LeafCollector {
+
+ private final SortedDocValues docTermOrds;
+ private final LongValues segmentOrdToGlobalOrdLookup;
+
+ OrdinalMapCollector(SortedDocValues docTermOrds, LongValues segmentOrdToGlobalOrdLookup) {
+ this.docTermOrds = docTermOrds;
+ this.segmentOrdToGlobalOrdLookup = segmentOrdToGlobalOrdLookup;
+ }
+
+ @Override
+ public void collect(int doc) throws IOException {
+ final long segmentOrd = docTermOrds.getOrd(doc);
+ if (segmentOrd != -1) {
+ final long globalOrd = segmentOrdToGlobalOrdLookup.get(segmentOrd);
+ collectedOrds.set(globalOrd);
+ }
+ }
+
+ @Override
+ public void setScorer(Scorer scorer) throws IOException {
+ }
+ }
+
+ final class SegmentOrdinalCollector implements LeafCollector {
+
+ private final SortedDocValues docTermOrds;
+
+ SegmentOrdinalCollector(SortedDocValues docTermOrds) {
+ this.docTermOrds = docTermOrds;
+ }
+
+ @Override
+ public void collect(int doc) throws IOException {
+ final long segmentOrd = docTermOrds.getOrd(doc);
+ if (segmentOrd != -1) {
+ collectedOrds.set(segmentOrd);
+ }
+ }
+
+ @Override
+ public void setScorer(Scorer scorer) throws IOException {
+ }
+ }
+
+}
diff --git a/lucene/join/src/java/org/apache/lucene/search/join/GlobalOrdinalsQuery.java b/lucene/join/src/java/org/apache/lucene/search/join/GlobalOrdinalsQuery.java
new file mode 100644
index 00000000000..a96881df1a9
--- /dev/null
+++ b/lucene/join/src/java/org/apache/lucene/search/join/GlobalOrdinalsQuery.java
@@ -0,0 +1,245 @@
+package org.apache.lucene.search.join;
+
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import org.apache.lucene.index.DocValues;
+import org.apache.lucene.index.IndexReader;
+import org.apache.lucene.index.LeafReaderContext;
+import org.apache.lucene.index.MultiDocValues;
+import org.apache.lucene.index.SortedDocValues;
+import org.apache.lucene.index.Term;
+import org.apache.lucene.search.ComplexExplanation;
+import org.apache.lucene.search.DocIdSetIterator;
+import org.apache.lucene.search.Explanation;
+import org.apache.lucene.search.IndexSearcher;
+import org.apache.lucene.search.Query;
+import org.apache.lucene.search.Scorer;
+import org.apache.lucene.search.TwoPhaseIterator;
+import org.apache.lucene.search.Weight;
+import org.apache.lucene.util.Bits;
+import org.apache.lucene.util.BytesRef;
+import org.apache.lucene.util.LongBitSet;
+import org.apache.lucene.util.LongValues;
+
+import java.io.IOException;
+import java.util.Set;
+
+final class GlobalOrdinalsQuery extends Query {
+
+ // All the ords of matching docs found with OrdinalsCollector.
+ private final LongBitSet foundOrds;
+ private final String joinField;
+ private final MultiDocValues.OrdinalMap globalOrds;
+ // Is also an approximation of the docs that will match. Can be all docs that have toField or something more specific.
+ private final Query toQuery;
+
+ // just for hashcode and equals:
+ private final Query fromQuery;
+ private final IndexReader indexReader;
+
+ GlobalOrdinalsQuery(LongBitSet foundOrds, String joinField, MultiDocValues.OrdinalMap globalOrds, Query toQuery, Query fromQuery, IndexReader indexReader) {
+ this.foundOrds = foundOrds;
+ this.joinField = joinField;
+ this.globalOrds = globalOrds;
+ this.toQuery = toQuery;
+ this.fromQuery = fromQuery;
+ this.indexReader = indexReader;
+ }
+
+ @Override
+ public Weight createWeight(IndexSearcher searcher, boolean needsScores) throws IOException {
+ return new W(this, toQuery.createWeight(searcher, false));
+ }
+
+ @Override
+ public void extractTerms(Set terms) {
+ fromQuery.extractTerms(terms);
+ }
+
+ @Override
+ public boolean equals(Object o) {
+ if (this == o) return true;
+ if (o == null || getClass() != o.getClass()) return false;
+ if (!super.equals(o)) return false;
+
+ GlobalOrdinalsQuery that = (GlobalOrdinalsQuery) o;
+
+ if (!fromQuery.equals(that.fromQuery)) return false;
+ if (!joinField.equals(that.joinField)) return false;
+ if (!toQuery.equals(that.toQuery)) return false;
+ if (!indexReader.equals(that.indexReader)) return false;
+
+ return true;
+ }
+
+ @Override
+ public int hashCode() {
+ int result = super.hashCode();
+ result = 31 * result + joinField.hashCode();
+ result = 31 * result + toQuery.hashCode();
+ result = 31 * result + fromQuery.hashCode();
+ result = 31 * result + indexReader.hashCode();
+ return result;
+ }
+
+ @Override
+ public String toString(String field) {
+ return "GlobalOrdinalsQuery{" +
+ "joinField=" + joinField +
+ '}';
+ }
+
+ final class W extends Weight {
+
+ private final Weight approximationWeight;
+
+ private float queryNorm;
+ private float queryWeight;
+
+ W(Query query, Weight approximationWeight) {
+ super(query);
+ this.approximationWeight = approximationWeight;
+ }
+
+ @Override
+ public Explanation explain(LeafReaderContext context, int doc) throws IOException {
+ SortedDocValues values = DocValues.getSorted(context.reader(), joinField);
+ if (values != null) {
+ int segmentOrd = values.getOrd(doc);
+ if (segmentOrd != -1) {
+ BytesRef joinValue = values.lookupOrd(segmentOrd);
+ return new ComplexExplanation(true, queryNorm, "Score based on join value " + joinValue.utf8ToString());
+ }
+ }
+ return new ComplexExplanation(false, 0.0f, "Not a match");
+ }
+
+ @Override
+ public float getValueForNormalization() throws IOException {
+ queryWeight = getBoost();
+ return queryWeight * queryWeight;
+ }
+
+ @Override
+ public void normalize(float norm, float topLevelBoost) {
+ this.queryNorm = norm * topLevelBoost;
+ queryWeight *= this.queryNorm;
+ }
+
+ @Override
+ public Scorer scorer(LeafReaderContext context, Bits acceptDocs) throws IOException {
+ SortedDocValues values = DocValues.getSorted(context.reader(), joinField);
+ if (values == null) {
+ return null;
+ }
+
+ Scorer approximationScorer = approximationWeight.scorer(context, acceptDocs);
+ if (approximationScorer == null) {
+ return null;
+ }
+ if (globalOrds != null) {
+ return new OrdinalMapScorer(this, queryNorm, foundOrds, values, approximationScorer, globalOrds.getGlobalOrds(context.ord));
+ } {
+ return new SegmentOrdinalScorer(this, queryNorm, foundOrds, values, approximationScorer);
+ }
+ }
+
+ }
+
+ final static class OrdinalMapScorer extends BaseGlobalOrdinalScorer {
+
+ final LongValues segmentOrdToGlobalOrdLookup;
+
+ public OrdinalMapScorer(Weight weight, float score, LongBitSet foundOrds, SortedDocValues values, Scorer approximationScorer, LongValues segmentOrdToGlobalOrdLookup) {
+ super(weight, foundOrds, values, approximationScorer);
+ this.score = score;
+ this.segmentOrdToGlobalOrdLookup = segmentOrdToGlobalOrdLookup;
+ }
+
+ @Override
+ public int advance(int target) throws IOException {
+ for (int docID = approximationScorer.advance(target); docID < NO_MORE_DOCS; docID = approximationScorer.nextDoc()) {
+ final long segmentOrd = values.getOrd(docID);
+ if (segmentOrd != -1) {
+ final long globalOrd = segmentOrdToGlobalOrdLookup.get(segmentOrd);
+ if (foundOrds.get(globalOrd)) {
+ return docID;
+ }
+ }
+ }
+ return NO_MORE_DOCS;
+ }
+
+ @Override
+ protected TwoPhaseIterator createTwoPhaseIterator(DocIdSetIterator approximation) {
+ return new TwoPhaseIterator(approximation) {
+
+ @Override
+ public boolean matches() throws IOException {
+ final long segmentOrd = values.getOrd(approximationScorer.docID());
+ if (segmentOrd != -1) {
+ final long globalOrd = segmentOrdToGlobalOrdLookup.get(segmentOrd);
+ if (foundOrds.get(globalOrd)) {
+ return true;
+ }
+ }
+ return false;
+ }
+ };
+ }
+ }
+
+ final static class SegmentOrdinalScorer extends BaseGlobalOrdinalScorer {
+
+ public SegmentOrdinalScorer(Weight weight, float score, LongBitSet foundOrds, SortedDocValues values, Scorer approximationScorer) {
+ super(weight, foundOrds, values, approximationScorer);
+ this.score = score;
+ }
+
+ @Override
+ public int advance(int target) throws IOException {
+ for (int docID = approximationScorer.advance(target); docID < NO_MORE_DOCS; docID = approximationScorer.nextDoc()) {
+ final long segmentOrd = values.getOrd(docID);
+ if (segmentOrd != -1) {
+ if (foundOrds.get(segmentOrd)) {
+ return docID;
+ }
+ }
+ }
+ return NO_MORE_DOCS;
+ }
+
+ @Override
+ protected TwoPhaseIterator createTwoPhaseIterator(DocIdSetIterator approximation) {
+ return new TwoPhaseIterator(approximation) {
+
+ @Override
+ public boolean matches() throws IOException {
+ final long segmentOrd = values.getOrd(approximationScorer.docID());
+ if (segmentOrd != -1) {
+ if (foundOrds.get(segmentOrd)) {
+ return true;
+ }
+ }
+ return false;
+ }
+ };
+ }
+
+ }
+}
diff --git a/lucene/join/src/java/org/apache/lucene/search/join/GlobalOrdinalsWithScoreCollector.java b/lucene/join/src/java/org/apache/lucene/search/join/GlobalOrdinalsWithScoreCollector.java
new file mode 100644
index 00000000000..37ac699280f
--- /dev/null
+++ b/lucene/join/src/java/org/apache/lucene/search/join/GlobalOrdinalsWithScoreCollector.java
@@ -0,0 +1,250 @@
+package org.apache.lucene.search.join;
+
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import org.apache.lucene.index.DocValues;
+import org.apache.lucene.index.LeafReaderContext;
+import org.apache.lucene.index.MultiDocValues;
+import org.apache.lucene.index.SortedDocValues;
+import org.apache.lucene.search.Collector;
+import org.apache.lucene.search.LeafCollector;
+import org.apache.lucene.search.Scorer;
+import org.apache.lucene.util.LongBitSet;
+import org.apache.lucene.util.LongValues;
+
+import java.io.IOException;
+
+abstract class GlobalOrdinalsWithScoreCollector implements Collector {
+
+ final String field;
+ final MultiDocValues.OrdinalMap ordinalMap;
+ final LongBitSet collectedOrds;
+ protected final Scores scores;
+
+ GlobalOrdinalsWithScoreCollector(String field, MultiDocValues.OrdinalMap ordinalMap, long valueCount) {
+ if (valueCount > Integer.MAX_VALUE) {
+ // We simply don't support more than
+ throw new IllegalStateException("Can't collect more than [" + Integer.MAX_VALUE + "] ids");
+ }
+ this.field = field;
+ this.ordinalMap = ordinalMap;
+ this.collectedOrds = new LongBitSet(valueCount);
+ this.scores = new Scores(valueCount);
+ }
+
+ public LongBitSet getCollectorOrdinals() {
+ return collectedOrds;
+ }
+
+ public float score(int globalOrdinal) {
+ return scores.getScore(globalOrdinal);
+ }
+
+ protected abstract void doScore(int globalOrd, float existingScore, float newScore);
+
+ @Override
+ public LeafCollector getLeafCollector(LeafReaderContext context) throws IOException {
+ SortedDocValues docTermOrds = DocValues.getSorted(context.reader(), field);
+ if (ordinalMap != null) {
+ LongValues segmentOrdToGlobalOrdLookup = ordinalMap.getGlobalOrds(context.ord);
+ return new OrdinalMapCollector(docTermOrds, segmentOrdToGlobalOrdLookup);
+ } else {
+ return new SegmentOrdinalCollector(docTermOrds);
+ }
+ }
+
+ @Override
+ public boolean needsScores() {
+ return true;
+ }
+
+ final class OrdinalMapCollector implements LeafCollector {
+
+ private final SortedDocValues docTermOrds;
+ private final LongValues segmentOrdToGlobalOrdLookup;
+ private Scorer scorer;
+
+ OrdinalMapCollector(SortedDocValues docTermOrds, LongValues segmentOrdToGlobalOrdLookup) {
+ this.docTermOrds = docTermOrds;
+ this.segmentOrdToGlobalOrdLookup = segmentOrdToGlobalOrdLookup;
+ }
+
+ @Override
+ public void collect(int doc) throws IOException {
+ final long segmentOrd = docTermOrds.getOrd(doc);
+ if (segmentOrd != -1) {
+ final int globalOrd = (int) segmentOrdToGlobalOrdLookup.get(segmentOrd);
+ collectedOrds.set(globalOrd);
+ float existingScore = scores.getScore(globalOrd);
+ float newScore = scorer.score();
+ doScore(globalOrd, existingScore, newScore);
+ }
+ }
+
+ @Override
+ public void setScorer(Scorer scorer) throws IOException {
+ this.scorer = scorer;
+ }
+ }
+
+ final class SegmentOrdinalCollector implements LeafCollector {
+
+ private final SortedDocValues docTermOrds;
+ private Scorer scorer;
+
+ SegmentOrdinalCollector(SortedDocValues docTermOrds) {
+ this.docTermOrds = docTermOrds;
+ }
+
+ @Override
+ public void collect(int doc) throws IOException {
+ final int segmentOrd = docTermOrds.getOrd(doc);
+ if (segmentOrd != -1) {
+ collectedOrds.set(segmentOrd);
+ float existingScore = scores.getScore(segmentOrd);
+ float newScore = scorer.score();
+ doScore(segmentOrd, existingScore, newScore);
+ }
+ }
+
+ @Override
+ public void setScorer(Scorer scorer) throws IOException {
+ this.scorer = scorer;
+ }
+ }
+
+ static final class Max extends GlobalOrdinalsWithScoreCollector {
+
+ public Max(String field, MultiDocValues.OrdinalMap ordinalMap, long valueCount) {
+ super(field, ordinalMap, valueCount);
+ }
+
+ @Override
+ protected void doScore(int globalOrd, float existingScore, float newScore) {
+ scores.setScore(globalOrd, Math.max(existingScore, newScore));
+ }
+
+ }
+
+ static final class Sum extends GlobalOrdinalsWithScoreCollector {
+
+ public Sum(String field, MultiDocValues.OrdinalMap ordinalMap, long valueCount) {
+ super(field, ordinalMap, valueCount);
+ }
+
+ @Override
+ protected void doScore(int globalOrd, float existingScore, float newScore) {
+ scores.setScore(globalOrd, existingScore + newScore);
+ }
+
+ }
+
+ static final class Avg extends GlobalOrdinalsWithScoreCollector {
+
+ private final Occurrences occurrences;
+
+ public Avg(String field, MultiDocValues.OrdinalMap ordinalMap, long valueCount) {
+ super(field, ordinalMap, valueCount);
+ this.occurrences = new Occurrences(valueCount);
+ }
+
+ @Override
+ protected void doScore(int globalOrd, float existingScore, float newScore) {
+ occurrences.increment(globalOrd);
+ scores.setScore(globalOrd, existingScore + newScore);
+ }
+
+ @Override
+ public float score(int globalOrdinal) {
+ return scores.getScore(globalOrdinal) / occurrences.getOccurence(globalOrdinal);
+ }
+ }
+
+ // Because the global ordinal is directly used as a key to a score we should be somewhat smart about allocation
+ // the scores array. Most of the times not all docs match so splitting the scores array up in blocks can prevent creation of huge arrays.
+ // Also working with smaller arrays is supposed to be more gc friendly
+ //
+ // At first a hash map implementation would make sense, but in the case that more than half of docs match this becomes more expensive
+ // then just using an array.
+
+ // Maybe this should become a method parameter?
+ static final int arraySize = 4096;
+
+ static final class Scores {
+
+ final float[][] blocks;
+
+ private Scores(long valueCount) {
+ long blockSize = valueCount + arraySize - 1;
+ blocks = new float[(int) ((blockSize) / arraySize)][];
+ }
+
+ public void setScore(int globalOrdinal, float score) {
+ int block = globalOrdinal / arraySize;
+ int offset = globalOrdinal % arraySize;
+ float[] scores = blocks[block];
+ if (scores == null) {
+ blocks[block] = scores = new float[arraySize];
+ }
+ scores[offset] = score;
+ }
+
+ public float getScore(int globalOrdinal) {
+ int block = globalOrdinal / arraySize;
+ int offset = globalOrdinal % arraySize;
+ float[] scores = blocks[block];
+ float score;
+ if (scores != null) {
+ score = scores[offset];
+ } else {
+ score = 0f;
+ }
+ return score;
+ }
+
+ }
+
+ static final class Occurrences {
+
+ final int[][] blocks;
+
+ private Occurrences(long valueCount) {
+ long blockSize = valueCount + arraySize - 1;
+ blocks = new int[(int) (blockSize / arraySize)][];
+ }
+
+ public void increment(int globalOrdinal) {
+ int block = globalOrdinal / arraySize;
+ int offset = globalOrdinal % arraySize;
+ int[] occurrences = blocks[block];
+ if (occurrences == null) {
+ blocks[block] = occurrences = new int[arraySize];
+ }
+ occurrences[offset]++;
+ }
+
+ public int getOccurence(int globalOrdinal) {
+ int block = globalOrdinal / arraySize;
+ int offset = globalOrdinal % arraySize;
+ int[] occurrences = blocks[block];
+ return occurrences[offset];
+ }
+
+ }
+
+}
diff --git a/lucene/join/src/java/org/apache/lucene/search/join/GlobalOrdinalsWithScoreQuery.java b/lucene/join/src/java/org/apache/lucene/search/join/GlobalOrdinalsWithScoreQuery.java
new file mode 100644
index 00000000000..f9d4df7418d
--- /dev/null
+++ b/lucene/join/src/java/org/apache/lucene/search/join/GlobalOrdinalsWithScoreQuery.java
@@ -0,0 +1,256 @@
+package org.apache.lucene.search.join;
+
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import org.apache.lucene.index.DocValues;
+import org.apache.lucene.index.IndexReader;
+import org.apache.lucene.index.LeafReaderContext;
+import org.apache.lucene.index.MultiDocValues;
+import org.apache.lucene.index.SortedDocValues;
+import org.apache.lucene.index.Term;
+import org.apache.lucene.search.ComplexExplanation;
+import org.apache.lucene.search.DocIdSetIterator;
+import org.apache.lucene.search.Explanation;
+import org.apache.lucene.search.IndexSearcher;
+import org.apache.lucene.search.Query;
+import org.apache.lucene.search.Scorer;
+import org.apache.lucene.search.TwoPhaseIterator;
+import org.apache.lucene.search.Weight;
+import org.apache.lucene.util.Bits;
+import org.apache.lucene.util.BytesRef;
+import org.apache.lucene.util.LongValues;
+
+import java.io.IOException;
+import java.util.Set;
+
+final class GlobalOrdinalsWithScoreQuery extends Query {
+
+ private final GlobalOrdinalsWithScoreCollector collector;
+ private final String joinField;
+ private final MultiDocValues.OrdinalMap globalOrds;
+ // Is also an approximation of the docs that will match. Can be all docs that have toField or something more specific.
+ private final Query toQuery;
+
+ // just for hashcode and equals:
+ private final Query fromQuery;
+ private final IndexReader indexReader;
+
+ GlobalOrdinalsWithScoreQuery(GlobalOrdinalsWithScoreCollector collector, String joinField, MultiDocValues.OrdinalMap globalOrds, Query toQuery, Query fromQuery, IndexReader indexReader) {
+ this.collector = collector;
+ this.joinField = joinField;
+ this.globalOrds = globalOrds;
+ this.toQuery = toQuery;
+ this.fromQuery = fromQuery;
+ this.indexReader = indexReader;
+ }
+
+ @Override
+ public Weight createWeight(IndexSearcher searcher, boolean needsScores) throws IOException {
+ return new W(this, toQuery.createWeight(searcher, false));
+ }
+
+ @Override
+ public void extractTerms(Set terms) {
+ fromQuery.extractTerms(terms);
+ }
+
+ @Override
+ public boolean equals(Object o) {
+ if (this == o) return true;
+ if (o == null || getClass() != o.getClass()) return false;
+ if (!super.equals(o)) return false;
+
+ GlobalOrdinalsWithScoreQuery that = (GlobalOrdinalsWithScoreQuery) o;
+
+ if (!fromQuery.equals(that.fromQuery)) return false;
+ if (!joinField.equals(that.joinField)) return false;
+ if (!toQuery.equals(that.toQuery)) return false;
+ if (!indexReader.equals(that.indexReader)) return false;
+
+ return true;
+ }
+
+ @Override
+ public int hashCode() {
+ int result = super.hashCode();
+ result = 31 * result + joinField.hashCode();
+ result = 31 * result + toQuery.hashCode();
+ result = 31 * result + fromQuery.hashCode();
+ result = 31 * result + indexReader.hashCode();
+ return result;
+ }
+
+ @Override
+ public String toString(String field) {
+ return "GlobalOrdinalsQuery{" +
+ "joinField=" + joinField +
+ '}';
+ }
+
+ final class W extends Weight {
+
+ private final Weight approximationWeight;
+
+ private float queryNorm;
+ private float queryWeight;
+
+ W(Query query, Weight approximationWeight) {
+ super(query);
+ this.approximationWeight = approximationWeight;
+ }
+
+ @Override
+ public Explanation explain(LeafReaderContext context, int doc) throws IOException {
+ SortedDocValues values = DocValues.getSorted(context.reader(), joinField);
+ if (values != null) {
+ int segmentOrd = values.getOrd(doc);
+ if (segmentOrd != -1) {
+ final float score;
+ if (globalOrds != null) {
+ long globalOrd = globalOrds.getGlobalOrds(context.ord).get(segmentOrd);
+ score = collector.scores.getScore((int) globalOrd);
+ } else {
+ score = collector.score(segmentOrd);
+ }
+ BytesRef joinValue = values.lookupOrd(segmentOrd);
+ return new ComplexExplanation(true, score, "Score based on join value " + joinValue.utf8ToString());
+ }
+ }
+ return new ComplexExplanation(false, 0.0f, "Not a match");
+ }
+
+ @Override
+ public float getValueForNormalization() throws IOException {
+ queryWeight = getBoost();
+ return queryWeight * queryWeight;
+ }
+
+ @Override
+ public void normalize(float norm, float topLevelBoost) {
+ this.queryNorm = norm * topLevelBoost;
+ queryWeight *= this.queryNorm;
+ }
+
+ @Override
+ public Scorer scorer(LeafReaderContext context, Bits acceptDocs) throws IOException {
+ SortedDocValues values = DocValues.getSorted(context.reader(), joinField);
+ if (values == null) {
+ return null;
+ }
+
+ Scorer approximationScorer = approximationWeight.scorer(context, acceptDocs);
+ if (approximationScorer == null) {
+ return null;
+ } else if (globalOrds != null) {
+ return new OrdinalMapScorer(this, collector, values, approximationScorer, globalOrds.getGlobalOrds(context.ord));
+ } else {
+ return new SegmentOrdinalScorer(this, collector, values, approximationScorer);
+ }
+ }
+
+ }
+
+ final static class OrdinalMapScorer extends BaseGlobalOrdinalScorer {
+
+ final LongValues segmentOrdToGlobalOrdLookup;
+ final GlobalOrdinalsWithScoreCollector collector;
+
+ public OrdinalMapScorer(Weight weight, GlobalOrdinalsWithScoreCollector collector, SortedDocValues values, Scorer approximationScorer, LongValues segmentOrdToGlobalOrdLookup) {
+ super(weight, collector.getCollectorOrdinals(), values, approximationScorer);
+ this.segmentOrdToGlobalOrdLookup = segmentOrdToGlobalOrdLookup;
+ this.collector = collector;
+ }
+
+ @Override
+ public int advance(int target) throws IOException {
+ for (int docID = approximationScorer.advance(target); docID < NO_MORE_DOCS; docID = approximationScorer.nextDoc()) {
+ final long segmentOrd = values.getOrd(docID);
+ if (segmentOrd != -1) {
+ final long globalOrd = segmentOrdToGlobalOrdLookup.get(segmentOrd);
+ if (foundOrds.get(globalOrd)) {
+ score = collector.score((int) globalOrd);
+ return docID;
+ }
+ }
+ }
+ return NO_MORE_DOCS;
+ }
+
+ @Override
+ protected TwoPhaseIterator createTwoPhaseIterator(DocIdSetIterator approximation) {
+ return new TwoPhaseIterator(approximation) {
+
+ @Override
+ public boolean matches() throws IOException {
+ final long segmentOrd = values.getOrd(approximationScorer.docID());
+ if (segmentOrd != -1) {
+ final long globalOrd = segmentOrdToGlobalOrdLookup.get(segmentOrd);
+ if (foundOrds.get(globalOrd)) {
+ score = collector.score((int) globalOrd);
+ return true;
+ }
+ }
+ return false;
+ }
+
+ };
+ }
+ }
+
+ final static class SegmentOrdinalScorer extends BaseGlobalOrdinalScorer {
+
+ final GlobalOrdinalsWithScoreCollector collector;
+
+ public SegmentOrdinalScorer(Weight weight, GlobalOrdinalsWithScoreCollector collector, SortedDocValues values, Scorer approximationScorer) {
+ super(weight, collector.getCollectorOrdinals(), values, approximationScorer);
+ this.collector = collector;
+ }
+
+ @Override
+ public int advance(int target) throws IOException {
+ for (int docID = approximationScorer.advance(target); docID < NO_MORE_DOCS; docID = approximationScorer.nextDoc()) {
+ final int segmentOrd = values.getOrd(docID);
+ if (segmentOrd != -1) {
+ if (foundOrds.get(segmentOrd)) {
+ score = collector.score(segmentOrd);
+ return docID;
+ }
+ }
+ }
+ return NO_MORE_DOCS;
+ }
+
+ @Override
+ protected TwoPhaseIterator createTwoPhaseIterator(DocIdSetIterator approximation) {
+ return new TwoPhaseIterator(approximation) {
+
+ @Override
+ public boolean matches() throws IOException {
+ final int segmentOrd = values.getOrd(approximationScorer.docID());
+ if (segmentOrd != -1) {
+ if (foundOrds.get(segmentOrd)) {
+ score = collector.score(segmentOrd);
+ return true;
+ }
+ }
+ return false;
+ }
+ };
+ }
+ }
+}
diff --git a/lucene/join/src/java/org/apache/lucene/search/join/JoinUtil.java b/lucene/join/src/java/org/apache/lucene/search/join/JoinUtil.java
index 44abe7bc464..89ac5089797 100644
--- a/lucene/join/src/java/org/apache/lucene/search/join/JoinUtil.java
+++ b/lucene/join/src/java/org/apache/lucene/search/join/JoinUtil.java
@@ -17,7 +17,12 @@ package org.apache.lucene.search.join;
* limitations under the License.
*/
+import org.apache.lucene.index.IndexReader;
+import org.apache.lucene.index.LeafReader;
+import org.apache.lucene.index.MultiDocValues;
+import org.apache.lucene.index.SortedDocValues;
import org.apache.lucene.search.IndexSearcher;
+import org.apache.lucene.search.MatchNoDocsQuery;
import org.apache.lucene.search.Query;
import java.io.IOException;
@@ -90,4 +95,78 @@ public final class JoinUtil {
}
}
+ /**
+ * A query time join using global ordinals over a dedicated join field.
+ *
+ * This join has certain restrictions and requirements:
+ * 1) A document can only refer to one other document. (but can be referred by one or more documents)
+ * 2) Documents on each side of the join must be distinguishable. Typically this can be done by adding an extra field
+ * that identifies the "from" and "to" side and then the fromQuery and toQuery must take the this into account.
+ * 3) There must be a single sorted doc values join field used by both the "from" and "to" documents. This join field
+ * should store the join values as UTF-8 strings.
+ * 4) An ordinal map must be provided that is created on top of the join field.
+ *
+ * @param joinField The {@link org.apache.lucene.index.SortedDocValues} field containing the join values
+ * @param fromQuery The query containing the actual user query. Also the fromQuery can only match "from" documents.
+ * @param toQuery The query identifying all documents on the "to" side.
+ * @param searcher The index searcher used to execute the from query
+ * @param scoreMode Instructs how scores from the fromQuery are mapped to the returned query
+ * @param ordinalMap The ordinal map constructed over the joinField. In case of a single segment index, no ordinal map
+ * needs to be provided.
+ * @return a {@link Query} instance that can be used to join documents based on the join field
+ * @throws IOException If I/O related errors occur
+ */
+ public static Query createJoinQuery(String joinField,
+ Query fromQuery,
+ Query toQuery,
+ IndexSearcher searcher,
+ ScoreMode scoreMode,
+ MultiDocValues.OrdinalMap ordinalMap) throws IOException {
+ IndexReader indexReader = searcher.getIndexReader();
+ int numSegments = indexReader.leaves().size();
+ final long valueCount;
+ if (numSegments == 0) {
+ return new MatchNoDocsQuery();
+ } else if (numSegments == 1) {
+ // No need to use the ordinal map, because there is just one segment.
+ ordinalMap = null;
+ LeafReader leafReader = searcher.getIndexReader().leaves().get(0).reader();
+ SortedDocValues joinSortedDocValues = leafReader.getSortedDocValues(joinField);
+ if (joinSortedDocValues != null) {
+ valueCount = joinSortedDocValues.getValueCount();
+ } else {
+ return new MatchNoDocsQuery();
+ }
+ } else {
+ if (ordinalMap == null) {
+ throw new IllegalArgumentException("OrdinalMap is required, because there is more than 1 segment");
+ }
+ valueCount = ordinalMap.getValueCount();
+ }
+
+ Query rewrittenFromQuery = searcher.rewrite(fromQuery);
+ if (scoreMode == ScoreMode.None) {
+ GlobalOrdinalsCollector globalOrdinalsCollector = new GlobalOrdinalsCollector(joinField, ordinalMap, valueCount);
+ searcher.search(fromQuery, globalOrdinalsCollector);
+ return new GlobalOrdinalsQuery(globalOrdinalsCollector.getCollectorOrdinals(), joinField, ordinalMap, toQuery, rewrittenFromQuery, indexReader);
+ }
+
+ GlobalOrdinalsWithScoreCollector globalOrdinalsWithScoreCollector;
+ switch (scoreMode) {
+ case Total:
+ globalOrdinalsWithScoreCollector = new GlobalOrdinalsWithScoreCollector.Sum(joinField, ordinalMap, valueCount);
+ break;
+ case Max:
+ globalOrdinalsWithScoreCollector = new GlobalOrdinalsWithScoreCollector.Max(joinField, ordinalMap, valueCount);
+ break;
+ case Avg:
+ globalOrdinalsWithScoreCollector = new GlobalOrdinalsWithScoreCollector.Avg(joinField, ordinalMap, valueCount);
+ break;
+ default:
+ throw new IllegalArgumentException(String.format(Locale.ROOT, "Score mode %s isn't supported.", scoreMode));
+ }
+ searcher.search(fromQuery, globalOrdinalsWithScoreCollector);
+ return new GlobalOrdinalsWithScoreQuery(globalOrdinalsWithScoreCollector, joinField, ordinalMap, toQuery, rewrittenFromQuery, indexReader);
+ }
+
}
diff --git a/lucene/join/src/test/org/apache/lucene/search/join/TestJoinUtil.java b/lucene/join/src/test/org/apache/lucene/search/join/TestJoinUtil.java
index 46fa0c1979b..bb2fb5ff0ac 100644
--- a/lucene/join/src/test/org/apache/lucene/search/join/TestJoinUtil.java
+++ b/lucene/join/src/test/org/apache/lucene/search/join/TestJoinUtil.java
@@ -17,19 +17,6 @@ package org.apache.lucene.search.join;
* limitations under the License.
*/
-import java.io.IOException;
-import java.util.ArrayList;
-import java.util.Collections;
-import java.util.Comparator;
-import java.util.HashMap;
-import java.util.HashSet;
-import java.util.List;
-import java.util.Locale;
-import java.util.Map;
-import java.util.Set;
-import java.util.SortedSet;
-import java.util.TreeSet;
-
import org.apache.lucene.analysis.MockAnalyzer;
import org.apache.lucene.analysis.MockTokenizer;
import org.apache.lucene.document.Document;
@@ -38,27 +25,29 @@ import org.apache.lucene.document.SortedDocValuesField;
import org.apache.lucene.document.SortedSetDocValuesField;
import org.apache.lucene.document.TextField;
import org.apache.lucene.index.BinaryDocValues;
+import org.apache.lucene.index.DirectoryReader;
import org.apache.lucene.index.DocValues;
-import org.apache.lucene.index.PostingsEnum;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.LeafReader;
import org.apache.lucene.index.LeafReaderContext;
+import org.apache.lucene.index.MultiDocValues;
import org.apache.lucene.index.MultiFields;
+import org.apache.lucene.index.NoMergePolicy;
+import org.apache.lucene.index.PostingsEnum;
import org.apache.lucene.index.RandomIndexWriter;
import org.apache.lucene.index.SlowCompositeReaderWrapper;
+import org.apache.lucene.index.SortedDocValues;
import org.apache.lucene.index.SortedSetDocValues;
import org.apache.lucene.index.Term;
import org.apache.lucene.index.Terms;
import org.apache.lucene.index.TermsEnum;
import org.apache.lucene.search.BooleanClause;
import org.apache.lucene.search.BooleanQuery;
-import org.apache.lucene.search.Collector;
import org.apache.lucene.search.DocIdSetIterator;
import org.apache.lucene.search.Explanation;
-import org.apache.lucene.search.FilterLeafCollector;
import org.apache.lucene.search.IndexSearcher;
-import org.apache.lucene.search.LeafCollector;
import org.apache.lucene.search.MatchAllDocsQuery;
+import org.apache.lucene.search.MultiCollector;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.ScoreDoc;
import org.apache.lucene.search.Scorer;
@@ -74,8 +63,22 @@ import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.FixedBitSet;
import org.apache.lucene.util.LuceneTestCase;
import org.apache.lucene.util.TestUtil;
+import org.apache.lucene.util.packed.PackedInts;
import org.junit.Test;
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.Collections;
+import java.util.Comparator;
+import java.util.HashMap;
+import java.util.HashSet;
+import java.util.List;
+import java.util.Locale;
+import java.util.Map;
+import java.util.Set;
+import java.util.SortedSet;
+import java.util.TreeSet;
+
public class TestJoinUtil extends LuceneTestCase {
public void testSimple() throws Exception {
@@ -169,6 +172,180 @@ public class TestJoinUtil extends LuceneTestCase {
dir.close();
}
+ public void testSimpleOrdinalsJoin() throws Exception {
+ final String idField = "id";
+ final String productIdField = "productId";
+ // A field indicating to what type a document belongs, which is then used to distinques between documents during joining.
+ final String typeField = "type";
+ // A single sorted doc values field that holds the join values for all document types.
+ // Typically during indexing a schema will automatically create this field with the values
+ final String joinField = idField + productIdField;
+
+ Directory dir = newDirectory();
+ RandomIndexWriter w = new RandomIndexWriter(
+ random(),
+ dir,
+ newIndexWriterConfig(new MockAnalyzer(random())).setMergePolicy(NoMergePolicy.INSTANCE));
+
+ // 0
+ Document doc = new Document();
+ doc.add(new TextField(idField, "1", Field.Store.NO));
+ doc.add(new TextField(typeField, "product", Field.Store.NO));
+ doc.add(new TextField("description", "random text", Field.Store.NO));
+ doc.add(new TextField("name", "name1", Field.Store.NO));
+ doc.add(new SortedDocValuesField(joinField, new BytesRef("1")));
+ w.addDocument(doc);
+
+ // 1
+ doc = new Document();
+ doc.add(new TextField(productIdField, "1", Field.Store.NO));
+ doc.add(new TextField(typeField, "price", Field.Store.NO));
+ doc.add(new TextField("price", "10.0", Field.Store.NO));
+ doc.add(new SortedDocValuesField(joinField, new BytesRef("1")));
+ w.addDocument(doc);
+
+ // 2
+ doc = new Document();
+ doc.add(new TextField(productIdField, "1", Field.Store.NO));
+ doc.add(new TextField(typeField, "price", Field.Store.NO));
+ doc.add(new TextField("price", "20.0", Field.Store.NO));
+ doc.add(new SortedDocValuesField(joinField, new BytesRef("1")));
+ w.addDocument(doc);
+
+ // 3
+ doc = new Document();
+ doc.add(new TextField(idField, "2", Field.Store.NO));
+ doc.add(new TextField(typeField, "product", Field.Store.NO));
+ doc.add(new TextField("description", "more random text", Field.Store.NO));
+ doc.add(new TextField("name", "name2", Field.Store.NO));
+ doc.add(new SortedDocValuesField(joinField, new BytesRef("2")));
+ w.addDocument(doc);
+ w.commit();
+
+ // 4
+ doc = new Document();
+ doc.add(new TextField(productIdField, "2", Field.Store.NO));
+ doc.add(new TextField(typeField, "price", Field.Store.NO));
+ doc.add(new TextField("price", "10.0", Field.Store.NO));
+ doc.add(new SortedDocValuesField(joinField, new BytesRef("2")));
+ w.addDocument(doc);
+
+ // 5
+ doc = new Document();
+ doc.add(new TextField(productIdField, "2", Field.Store.NO));
+ doc.add(new TextField(typeField, "price", Field.Store.NO));
+ doc.add(new TextField("price", "20.0", Field.Store.NO));
+ doc.add(new SortedDocValuesField(joinField, new BytesRef("2")));
+ w.addDocument(doc);
+
+ IndexSearcher indexSearcher = new IndexSearcher(w.getReader());
+ w.close();
+
+ IndexReader r = indexSearcher.getIndexReader();
+ SortedDocValues[] values = new SortedDocValues[r.leaves().size()];
+ for (int i = 0; i < values.length; i++) {
+ LeafReader leafReader = r.leaves().get(i).reader();
+ values[i] = DocValues.getSorted(leafReader, joinField);
+ }
+ MultiDocValues.OrdinalMap ordinalMap = MultiDocValues.OrdinalMap.build(
+ r.getCoreCacheKey(), values, PackedInts.DEFAULT
+ );
+
+ Query toQuery = new TermQuery(new Term(typeField, "price"));
+ Query fromQuery = new TermQuery(new Term("name", "name2"));
+ // Search for product and return prices
+ Query joinQuery = JoinUtil.createJoinQuery(joinField, fromQuery, toQuery, indexSearcher, ScoreMode.None, ordinalMap);
+ TopDocs result = indexSearcher.search(joinQuery, 10);
+ assertEquals(2, result.totalHits);
+ assertEquals(4, result.scoreDocs[0].doc);
+ assertEquals(5, result.scoreDocs[1].doc);
+
+ fromQuery = new TermQuery(new Term("name", "name1"));
+ joinQuery = JoinUtil.createJoinQuery(joinField, fromQuery, toQuery, indexSearcher, ScoreMode.None, ordinalMap);
+ result = indexSearcher.search(joinQuery, 10);
+ assertEquals(2, result.totalHits);
+ assertEquals(1, result.scoreDocs[0].doc);
+ assertEquals(2, result.scoreDocs[1].doc);
+
+ // Search for prices and return products
+ fromQuery = new TermQuery(new Term("price", "20.0"));
+ toQuery = new TermQuery(new Term(typeField, "product"));
+ joinQuery = JoinUtil.createJoinQuery(joinField, fromQuery, toQuery, indexSearcher, ScoreMode.None, ordinalMap);
+ result = indexSearcher.search(joinQuery, 10);
+ assertEquals(2, result.totalHits);
+ assertEquals(0, result.scoreDocs[0].doc);
+ assertEquals(3, result.scoreDocs[1].doc);
+
+ indexSearcher.getIndexReader().close();
+ dir.close();
+ }
+
+ public void testRandomOrdinalsJoin() throws Exception {
+ Directory dir = newDirectory();
+ RandomIndexWriter w = new RandomIndexWriter(
+ random(),
+ dir,
+ newIndexWriterConfig(new MockAnalyzer(random(), MockTokenizer.KEYWORD, false)).setMergePolicy(newLogMergePolicy())
+ );
+ IndexIterationContext context = createContext(100, w, false, true);
+
+ w.forceMerge(1);
+
+ w.close();
+ IndexReader topLevelReader = DirectoryReader.open(dir);
+
+ SortedDocValues[] values = new SortedDocValues[topLevelReader.leaves().size()];
+ for (LeafReaderContext leadContext : topLevelReader.leaves()) {
+ values[leadContext.ord] = DocValues.getSorted(leadContext.reader(), "join_field");
+ }
+ context.ordinalMap = MultiDocValues.OrdinalMap.build(
+ topLevelReader.getCoreCacheKey(), values, PackedInts.DEFAULT
+ );
+ IndexSearcher indexSearcher = newSearcher(topLevelReader);
+
+ int r = random().nextInt(context.randomUniqueValues.length);
+ boolean from = context.randomFrom[r];
+ String randomValue = context.randomUniqueValues[r];
+ BitSet expectedResult = createExpectedResult(randomValue, from, indexSearcher.getIndexReader(), context);
+
+ final Query actualQuery = new TermQuery(new Term("value", randomValue));
+ if (VERBOSE) {
+ System.out.println("actualQuery=" + actualQuery);
+ }
+ final ScoreMode scoreMode = ScoreMode.values()[random().nextInt(ScoreMode.values().length)];
+ if (VERBOSE) {
+ System.out.println("scoreMode=" + scoreMode);
+ }
+
+ final Query joinQuery;
+ if (from) {
+ BooleanQuery fromQuery = new BooleanQuery();
+ fromQuery.add(new TermQuery(new Term("type", "from")), BooleanClause.Occur.FILTER);
+ fromQuery.add(actualQuery, BooleanClause.Occur.MUST);
+ Query toQuery = new TermQuery(new Term("type", "to"));
+ joinQuery = JoinUtil.createJoinQuery("join_field", fromQuery, toQuery, indexSearcher, scoreMode, context.ordinalMap);
+ } else {
+ BooleanQuery fromQuery = new BooleanQuery();
+ fromQuery.add(new TermQuery(new Term("type", "to")), BooleanClause.Occur.FILTER);
+ fromQuery.add(actualQuery, BooleanClause.Occur.MUST);
+ Query toQuery = new TermQuery(new Term("type", "from"));
+ joinQuery = JoinUtil.createJoinQuery("join_field", fromQuery, toQuery, indexSearcher, scoreMode, context.ordinalMap);
+ }
+ if (VERBOSE) {
+ System.out.println("joinQuery=" + joinQuery);
+ }
+
+ final BitSet actualResult = new FixedBitSet(indexSearcher.getIndexReader().maxDoc());
+ final TopScoreDocCollector topScoreDocCollector = TopScoreDocCollector.create(10);
+ indexSearcher.search(joinQuery, MultiCollector.wrap(new BitSetCollector(actualResult), topScoreDocCollector));
+ assertBitSet(expectedResult, actualResult, indexSearcher);
+ TopDocs expectedTopDocs = createExpectedTopDocs(randomValue, from, scoreMode, context);
+ TopDocs actualTopDocs = topScoreDocCollector.topDocs();
+ assertTopDocs(expectedTopDocs, actualTopDocs, scoreMode, indexSearcher, joinQuery);
+ topLevelReader.close();
+ dir.close();
+ }
+
// TermsWithScoreCollector.MV.Avg forgets to grow beyond TermsWithScoreCollector.INITIAL_ARRAY_SIZE
public void testOverflowTermsWithScoreCollector() throws Exception {
test300spartans(true, ScoreMode.Avg);
@@ -218,7 +395,7 @@ public class TestJoinUtil extends LuceneTestCase {
TopDocs result = indexSearcher.search(joinQuery, 10);
assertEquals(1, result.totalHits);
assertEquals(0, result.scoreDocs[0].doc);
-
+
indexSearcher.getIndexReader().close();
dir.close();
@@ -310,7 +487,7 @@ public class TestJoinUtil extends LuceneTestCase {
assertFalse("optimized bulkScorer was not used for join query embedded in boolean query!", sawFive);
}
}
-
+
@Override
public boolean needsScores() {
return false;
@@ -448,7 +625,7 @@ public class TestJoinUtil extends LuceneTestCase {
dir,
newIndexWriterConfig(new MockAnalyzer(random(), MockTokenizer.KEYWORD, false)).setMergePolicy(newLogMergePolicy())
);
- IndexIterationContext context = createContext(numberOfDocumentsToIndex, w, multipleValuesPerDocument);
+ IndexIterationContext context = createContext(numberOfDocumentsToIndex, w, multipleValuesPerDocument, false);
IndexReader topLevelReader = w.getReader();
w.close();
@@ -485,73 +662,64 @@ public class TestJoinUtil extends LuceneTestCase {
// Need to know all documents that have matches. TopDocs doesn't give me that and then I'd be also testing TopDocsCollector...
final BitSet actualResult = new FixedBitSet(indexSearcher.getIndexReader().maxDoc());
final TopScoreDocCollector topScoreDocCollector = TopScoreDocCollector.create(10);
- indexSearcher.search(joinQuery, new Collector() {
-
- @Override
- public LeafCollector getLeafCollector(LeafReaderContext context) throws IOException {
- final int docBase = context.docBase;
- final LeafCollector in = topScoreDocCollector.getLeafCollector(context);
- return new FilterLeafCollector(in) {
-
- @Override
- public void collect(int doc) throws IOException {
- super.collect(doc);
- actualResult.set(doc + docBase);
- }
- };
- }
-
- @Override
- public boolean needsScores() {
- return topScoreDocCollector.needsScores();
- }
- });
+ indexSearcher.search(joinQuery, MultiCollector.wrap(new BitSetCollector(actualResult), topScoreDocCollector));
// Asserting bit set...
- if (VERBOSE) {
- System.out.println("expected cardinality:" + expectedResult.cardinality());
- DocIdSetIterator iterator = new BitSetIterator(expectedResult, expectedResult.cardinality());
- for (int doc = iterator.nextDoc(); doc != DocIdSetIterator.NO_MORE_DOCS; doc = iterator.nextDoc()) {
- System.out.println(String.format(Locale.ROOT, "Expected doc[%d] with id value %s", doc, indexSearcher.doc(doc).get("id")));
- }
- System.out.println("actual cardinality:" + actualResult.cardinality());
- iterator = new BitSetIterator(actualResult, actualResult.cardinality());
- for (int doc = iterator.nextDoc(); doc != DocIdSetIterator.NO_MORE_DOCS; doc = iterator.nextDoc()) {
- System.out.println(String.format(Locale.ROOT, "Actual doc[%d] with id value %s", doc, indexSearcher.doc(doc).get("id")));
- }
- }
- assertEquals(expectedResult, actualResult);
-
+ assertBitSet(expectedResult, actualResult, indexSearcher);
// Asserting TopDocs...
TopDocs expectedTopDocs = createExpectedTopDocs(randomValue, from, scoreMode, context);
TopDocs actualTopDocs = topScoreDocCollector.topDocs();
- assertEquals(expectedTopDocs.totalHits, actualTopDocs.totalHits);
- assertEquals(expectedTopDocs.scoreDocs.length, actualTopDocs.scoreDocs.length);
- if (scoreMode == ScoreMode.None) {
- continue;
- }
-
- assertEquals(expectedTopDocs.getMaxScore(), actualTopDocs.getMaxScore(), 0.0f);
- for (int i = 0; i < expectedTopDocs.scoreDocs.length; i++) {
- if (VERBOSE) {
- System.out.printf(Locale.ENGLISH, "Expected doc: %d | Actual doc: %d\n", expectedTopDocs.scoreDocs[i].doc, actualTopDocs.scoreDocs[i].doc);
- System.out.printf(Locale.ENGLISH, "Expected score: %f | Actual score: %f\n", expectedTopDocs.scoreDocs[i].score, actualTopDocs.scoreDocs[i].score);
- }
- assertEquals(expectedTopDocs.scoreDocs[i].doc, actualTopDocs.scoreDocs[i].doc);
- assertEquals(expectedTopDocs.scoreDocs[i].score, actualTopDocs.scoreDocs[i].score, 0.0f);
- Explanation explanation = indexSearcher.explain(joinQuery, expectedTopDocs.scoreDocs[i].doc);
- assertEquals(expectedTopDocs.scoreDocs[i].score, explanation.getValue(), 0.0f);
- }
+ assertTopDocs(expectedTopDocs, actualTopDocs, scoreMode, indexSearcher, joinQuery);
}
topLevelReader.close();
dir.close();
}
}
- private IndexIterationContext createContext(int nDocs, RandomIndexWriter writer, boolean multipleValuesPerDocument) throws IOException {
- return createContext(nDocs, writer, writer, multipleValuesPerDocument);
+ private void assertBitSet(BitSet expectedResult, BitSet actualResult, IndexSearcher indexSearcher) throws IOException {
+ if (VERBOSE) {
+ System.out.println("expected cardinality:" + expectedResult.cardinality());
+ DocIdSetIterator iterator = new BitSetIterator(expectedResult, expectedResult.cardinality());
+ for (int doc = iterator.nextDoc(); doc != DocIdSetIterator.NO_MORE_DOCS; doc = iterator.nextDoc()) {
+ System.out.println(String.format(Locale.ROOT, "Expected doc[%d] with id value %s", doc, indexSearcher.doc(doc).get("id")));
+ }
+ System.out.println("actual cardinality:" + actualResult.cardinality());
+ iterator = new BitSetIterator(actualResult, actualResult.cardinality());
+ for (int doc = iterator.nextDoc(); doc != DocIdSetIterator.NO_MORE_DOCS; doc = iterator.nextDoc()) {
+ System.out.println(String.format(Locale.ROOT, "Actual doc[%d] with id value %s", doc, indexSearcher.doc(doc).get("id")));
+ }
+ }
+ assertEquals(expectedResult, actualResult);
}
- private IndexIterationContext createContext(int nDocs, RandomIndexWriter fromWriter, RandomIndexWriter toWriter, boolean multipleValuesPerDocument) throws IOException {
+ private void assertTopDocs(TopDocs expectedTopDocs, TopDocs actualTopDocs, ScoreMode scoreMode, IndexSearcher indexSearcher, Query joinQuery) throws IOException {
+ assertEquals(expectedTopDocs.totalHits, actualTopDocs.totalHits);
+ assertEquals(expectedTopDocs.scoreDocs.length, actualTopDocs.scoreDocs.length);
+ if (scoreMode == ScoreMode.None) {
+ return;
+ }
+
+ assertEquals(expectedTopDocs.getMaxScore(), actualTopDocs.getMaxScore(), 0.0f);
+ for (int i = 0; i < expectedTopDocs.scoreDocs.length; i++) {
+ if (VERBOSE) {
+ System.out.printf(Locale.ENGLISH, "Expected doc: %d | Actual doc: %d\n", expectedTopDocs.scoreDocs[i].doc, actualTopDocs.scoreDocs[i].doc);
+ System.out.printf(Locale.ENGLISH, "Expected score: %f | Actual score: %f\n", expectedTopDocs.scoreDocs[i].score, actualTopDocs.scoreDocs[i].score);
+ }
+ assertEquals(expectedTopDocs.scoreDocs[i].doc, actualTopDocs.scoreDocs[i].doc);
+ assertEquals(expectedTopDocs.scoreDocs[i].score, actualTopDocs.scoreDocs[i].score, 0.0f);
+ Explanation explanation = indexSearcher.explain(joinQuery, expectedTopDocs.scoreDocs[i].doc);
+ assertEquals(expectedTopDocs.scoreDocs[i].score, explanation.getValue(), 0.0f);
+ }
+ }
+
+ private IndexIterationContext createContext(int nDocs, RandomIndexWriter writer, boolean multipleValuesPerDocument, boolean ordinalJoin) throws IOException {
+ return createContext(nDocs, writer, writer, multipleValuesPerDocument, ordinalJoin);
+ }
+
+ private IndexIterationContext createContext(int nDocs, RandomIndexWriter fromWriter, RandomIndexWriter toWriter, boolean multipleValuesPerDocument, boolean globalOrdinalJoin) throws IOException {
+ if (globalOrdinalJoin) {
+ assertFalse("ordinal join doesn't support multiple join values per document", multipleValuesPerDocument);
+ }
+
IndexIterationContext context = new IndexIterationContext();
int numRandomValues = nDocs / 2;
context.randomUniqueValues = new String[numRandomValues];
@@ -560,8 +728,8 @@ public class TestJoinUtil extends LuceneTestCase {
for (int i = 0; i < numRandomValues; i++) {
String uniqueRandomValue;
do {
- uniqueRandomValue = TestUtil.randomRealisticUnicodeString(random());
-// uniqueRandomValue = _TestUtil.randomSimpleString(random);
+// uniqueRandomValue = TestUtil.randomRealisticUnicodeString(random());
+ uniqueRandomValue = TestUtil.randomSimpleString(random());
} while ("".equals(uniqueRandomValue) || trackSet.contains(uniqueRandomValue));
// Generate unique values and empty strings aren't allowed.
trackSet.add(uniqueRandomValue);
@@ -581,15 +749,18 @@ public class TestJoinUtil extends LuceneTestCase {
boolean from = context.randomFrom[randomI];
int numberOfLinkValues = multipleValuesPerDocument ? 2 + random().nextInt(10) : 1;
docs[i] = new RandomDoc(id, numberOfLinkValues, value, from);
+ if (globalOrdinalJoin) {
+ document.add(newStringField("type", from ? "from" : "to", Field.Store.NO));
+ }
for (int j = 0; j < numberOfLinkValues; j++) {
String linkValue = context.randomUniqueValues[random().nextInt(context.randomUniqueValues.length)];
docs[i].linkValues.add(linkValue);
if (from) {
if (!context.fromDocuments.containsKey(linkValue)) {
- context.fromDocuments.put(linkValue, new ArrayList());
+ context.fromDocuments.put(linkValue, new ArrayList<>());
}
if (!context.randomValueFromDocs.containsKey(value)) {
- context.randomValueFromDocs.put(value, new ArrayList());
+ context.randomValueFromDocs.put(value, new ArrayList<>());
}
context.fromDocuments.get(linkValue).add(docs[i]);
@@ -600,12 +771,15 @@ public class TestJoinUtil extends LuceneTestCase {
} else {
document.add(new SortedDocValuesField("from", new BytesRef(linkValue)));
}
+ if (globalOrdinalJoin) {
+ document.add(new SortedDocValuesField("join_field", new BytesRef(linkValue)));
+ }
} else {
if (!context.toDocuments.containsKey(linkValue)) {
- context.toDocuments.put(linkValue, new ArrayList());
+ context.toDocuments.put(linkValue, new ArrayList<>());
}
if (!context.randomValueToDocs.containsKey(value)) {
- context.randomValueToDocs.put(value, new ArrayList());
+ context.randomValueToDocs.put(value, new ArrayList<>());
}
context.toDocuments.get(linkValue).add(docs[i]);
@@ -616,6 +790,9 @@ public class TestJoinUtil extends LuceneTestCase {
} else {
document.add(new SortedDocValuesField("to", new BytesRef(linkValue)));
}
+ if (globalOrdinalJoin) {
+ document.add(new SortedDocValuesField("join_field", new BytesRef(linkValue)));
+ }
}
}
@@ -707,6 +884,9 @@ public class TestJoinUtil extends LuceneTestCase {
if (joinScore == null) {
joinValueToJoinScores.put(BytesRef.deepCopyOf(joinValue), joinScore = new JoinScore());
}
+ if (VERBOSE) {
+ System.out.println("expected val=" + joinValue.utf8ToString() + " expected score=" + scorer.score());
+ }
joinScore.addScore(scorer.score());
}
@@ -720,7 +900,7 @@ public class TestJoinUtil extends LuceneTestCase {
public void setScorer(Scorer scorer) {
this.scorer = scorer;
}
-
+
@Override
public boolean needsScores() {
return true;
@@ -777,7 +957,7 @@ public class TestJoinUtil extends LuceneTestCase {
@Override
public void setScorer(Scorer scorer) {}
-
+
@Override
public boolean needsScores() {
return false;
@@ -875,6 +1055,7 @@ public class TestJoinUtil extends LuceneTestCase {
Map> fromHitsToJoinScore = new HashMap<>();
Map> toHitsToJoinScore = new HashMap<>();
+ MultiDocValues.OrdinalMap ordinalMap;
}
private static class RandomDoc {
@@ -922,4 +1103,29 @@ public class TestJoinUtil extends LuceneTestCase {
}
+ private static class BitSetCollector extends SimpleCollector {
+
+ private final BitSet bitSet;
+ private int docBase;
+
+ private BitSetCollector(BitSet bitSet) {
+ this.bitSet = bitSet;
+ }
+
+ @Override
+ public void collect(int doc) throws IOException {
+ bitSet.set(docBase + doc);
+ }
+
+ @Override
+ protected void doSetNextReader(LeafReaderContext context) throws IOException {
+ docBase = context.docBase;
+ }
+
+ @Override
+ public boolean needsScores() {
+ return false;
+ }
+ }
+
}
From 79bf72708b94c01691027c9a30364b96a4705ccf Mon Sep 17 00:00:00 2001
From: Varun Thacker
Date: Fri, 3 Apr 2015 09:46:54 +0000
Subject: [PATCH 4/9] SOLR-6637: Solr should have a way to restore a core
git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1671022 13f79535-47bb-0310-9956-ffa450edef68
---
solr/CHANGES.txt | 3 +
.../org/apache/solr/handler/IndexFetcher.java | 101 ++++----
.../solr/handler/OldBackupDirectory.java | 50 ++++
.../solr/handler/ReplicationHandler.java | 149 +++++++++--
.../org/apache/solr/handler/RestoreCore.java | 149 +++++++++++
.../org/apache/solr/handler/SnapShooter.java | 35 +--
.../handler/TestReplicationHandlerBackup.java | 77 ++----
.../apache/solr/handler/TestRestoreCore.java | 243 ++++++++++++++++++
8 files changed, 646 insertions(+), 161 deletions(-)
create mode 100644 solr/core/src/java/org/apache/solr/handler/OldBackupDirectory.java
create mode 100644 solr/core/src/java/org/apache/solr/handler/RestoreCore.java
create mode 100644 solr/core/src/test/org/apache/solr/handler/TestRestoreCore.java
diff --git a/solr/CHANGES.txt b/solr/CHANGES.txt
index fc5df697f80..215b78e0de3 100644
--- a/solr/CHANGES.txt
+++ b/solr/CHANGES.txt
@@ -78,6 +78,9 @@ Detailed Change List
New Features
----------------------
+* SOLR-6637: Solr should have a way to restore a core from a backed up index.
+ (Varun Thacker, noble, shalin)
+
Bug Fixes
----------------------
diff --git a/solr/core/src/java/org/apache/solr/handler/IndexFetcher.java b/solr/core/src/java/org/apache/solr/handler/IndexFetcher.java
index 605ba929ec3..1983282ea89 100644
--- a/solr/core/src/java/org/apache/solr/handler/IndexFetcher.java
+++ b/solr/core/src/java/org/apache/solr/handler/IndexFetcher.java
@@ -29,6 +29,7 @@ import java.nio.channels.FileChannel;
import java.nio.charset.StandardCharsets;
import java.nio.file.Files;
import java.nio.file.NoSuchFileException;
+import java.nio.file.Paths;
import java.text.SimpleDateFormat;
import java.util.ArrayList;
import java.util.Arrays;
@@ -246,31 +247,31 @@ public class IndexFetcher {
}
}
- boolean fetchLatestIndex(final SolrCore core, boolean forceReplication) throws IOException, InterruptedException {
- return fetchLatestIndex(core, forceReplication, false);
+ boolean fetchLatestIndex(boolean forceReplication) throws IOException, InterruptedException {
+ return fetchLatestIndex(forceReplication, false);
}
/**
* This command downloads all the necessary files from master to install a index commit point. Only changed files are
* downloaded. It also downloads the conf files (if they are modified).
*
- * @param core the SolrCore
* @param forceReplication force a replication in all cases
* @param forceCoreReload force a core reload in all cases
* @return true on success, false if slave is already in sync
* @throws IOException if an exception occurs
*/
- boolean fetchLatestIndex(final SolrCore core, boolean forceReplication, boolean forceCoreReload) throws IOException, InterruptedException {
+ boolean fetchLatestIndex(boolean forceReplication, boolean forceCoreReload) throws IOException, InterruptedException {
+
boolean cleanupDone = false;
boolean successfulInstall = false;
replicationStartTime = System.currentTimeMillis();
Directory tmpIndexDir = null;
- String tmpIndex = null;
+ String tmpIndex;
Directory indexDir = null;
- String indexDirPath = null;
+ String indexDirPath;
boolean deleteTmpIdxDir = true;
- if (!core.getSolrCoreState().getLastReplicateIndexSuccess()) {
+ if (!solrCore.getSolrCoreState().getLastReplicateIndexSuccess()) {
// if the last replication was not a success, we force a full replication
// when we are a bit more confident we may want to try a partial replication
// if the error is connection related or something, but we have to be careful
@@ -279,7 +280,7 @@ public class IndexFetcher {
try {
//get the current 'replicateable' index version in the master
- NamedList response = null;
+ NamedList response;
try {
response = getLatestVersion();
} catch (Exception e) {
@@ -290,12 +291,12 @@ public class IndexFetcher {
long latestGeneration = (Long) response.get(GENERATION);
// TODO: make sure that getLatestCommit only returns commit points for the main index (i.e. no side-car indexes)
- IndexCommit commit = core.getDeletionPolicy().getLatestCommit();
+ IndexCommit commit = solrCore.getDeletionPolicy().getLatestCommit();
if (commit == null) {
// Presumably the IndexWriter hasn't been opened yet, and hence the deletion policy hasn't been updated with commit points
RefCounted searcherRefCounted = null;
try {
- searcherRefCounted = core.getNewestSearcher(false);
+ searcherRefCounted = solrCore.getNewestSearcher(false);
if (searcherRefCounted == null) {
LOG.warn("No open searcher found - fetch aborted");
return false;
@@ -312,15 +313,14 @@ public class IndexFetcher {
if (forceReplication && commit.getGeneration() != 0) {
// since we won't get the files for an empty index,
// we just clear ours and commit
- RefCounted iw = core.getUpdateHandler().getSolrCoreState().getIndexWriter(core);
+ RefCounted iw = solrCore.getUpdateHandler().getSolrCoreState().getIndexWriter(solrCore);
try {
iw.get().deleteAll();
} finally {
iw.decref();
}
- SolrQueryRequest req = new LocalSolrQueryRequest(core,
- new ModifiableSolrParams());
- core.getUpdateHandler().commit(new CommitUpdateCommand(req, false));
+ SolrQueryRequest req = new LocalSolrQueryRequest(solrCore, new ModifiableSolrParams());
+ solrCore.getUpdateHandler().commit(new CommitUpdateCommand(req, false));
}
//there is nothing to be replicated
@@ -340,7 +340,9 @@ public class IndexFetcher {
// get the list of files first
fetchFileList(latestGeneration);
// this can happen if the commit point is deleted before we fetch the file list.
- if(filesToDownload.isEmpty()) return false;
+ if (filesToDownload.isEmpty()) {
+ return false;
+ }
LOG.info("Number of files in latest index in master: " + filesToDownload.size());
// Create the sync service
@@ -354,13 +356,13 @@ public class IndexFetcher {
|| commit.getGeneration() >= latestGeneration || forceReplication;
String tmpIdxDirName = "index." + new SimpleDateFormat(SnapShooter.DATE_FMT, Locale.ROOT).format(new Date());
- tmpIndex = createTempindexDir(core, tmpIdxDirName);
+ tmpIndex = Paths.get(solrCore.getDataDir(), tmpIdxDirName).toString();
- tmpIndexDir = core.getDirectoryFactory().get(tmpIndex, DirContext.DEFAULT, core.getSolrConfig().indexConfig.lockType);
+ tmpIndexDir = solrCore.getDirectoryFactory().get(tmpIndex, DirContext.DEFAULT, solrCore.getSolrConfig().indexConfig.lockType);
// cindex dir...
- indexDirPath = core.getIndexDir();
- indexDir = core.getDirectoryFactory().get(indexDirPath, DirContext.DEFAULT, core.getSolrConfig().indexConfig.lockType);
+ indexDirPath = solrCore.getIndexDir();
+ indexDir = solrCore.getDirectoryFactory().get(indexDirPath, DirContext.DEFAULT, solrCore.getSolrConfig().indexConfig.lockType);
try {
@@ -404,7 +406,7 @@ public class IndexFetcher {
} finally {
writer.decref();
}
- solrCore.getUpdateHandler().getSolrCoreState().closeIndexWriter(core, true);
+ solrCore.getUpdateHandler().getSolrCoreState().closeIndexWriter(solrCore, true);
}
boolean reloadCore = false;
@@ -422,7 +424,7 @@ public class IndexFetcher {
reloadCore = true;
downloadConfFiles(confFilesToDownload, latestGeneration);
if (isFullCopyNeeded) {
- successfulInstall = modifyIndexProps(tmpIdxDirName);
+ successfulInstall = IndexFetcher.modifyIndexProps(solrCore, tmpIdxDirName);
deleteTmpIdxDir = false;
} else {
successfulInstall = moveIndexFiles(tmpIndexDir, indexDir);
@@ -433,8 +435,8 @@ public class IndexFetcher {
// may be closed
if (indexDir != null) {
LOG.info("removing old index directory " + indexDir);
- core.getDirectoryFactory().doneWithDirectory(indexDir);
- core.getDirectoryFactory().remove(indexDir);
+ solrCore.getDirectoryFactory().doneWithDirectory(indexDir);
+ solrCore.getDirectoryFactory().remove(indexDir);
}
}
@@ -446,7 +448,7 @@ public class IndexFetcher {
} else {
terminateAndWaitFsyncService();
if (isFullCopyNeeded) {
- successfulInstall = modifyIndexProps(tmpIdxDirName);
+ successfulInstall = IndexFetcher.modifyIndexProps(solrCore, tmpIdxDirName);
deleteTmpIdxDir = false;
} else {
successfulInstall = moveIndexFiles(tmpIndexDir, indexDir);
@@ -458,13 +460,13 @@ public class IndexFetcher {
}
} finally {
if (!isFullCopyNeeded) {
- solrCore.getUpdateHandler().getSolrCoreState().openIndexWriter(core);
+ solrCore.getUpdateHandler().getSolrCoreState().openIndexWriter(solrCore);
}
}
// we must reload the core after we open the IW back up
if (successfulInstall && (reloadCore || forceCoreReload)) {
- LOG.info("Reloading SolrCore {}", core.getName());
+ LOG.info("Reloading SolrCore {}", solrCore.getName());
reloadCore();
}
@@ -474,8 +476,8 @@ public class IndexFetcher {
// may be closed
if (indexDir != null) {
LOG.info("removing old index directory " + indexDir);
- core.getDirectoryFactory().doneWithDirectory(indexDir);
- core.getDirectoryFactory().remove(indexDir);
+ solrCore.getDirectoryFactory().doneWithDirectory(indexDir);
+ solrCore.getDirectoryFactory().remove(indexDir);
}
}
if (isFullCopyNeeded) {
@@ -486,13 +488,13 @@ public class IndexFetcher {
}
if (!isFullCopyNeeded && !forceReplication && !successfulInstall) {
- cleanup(core, tmpIndexDir, indexDir, deleteTmpIdxDir, successfulInstall);
+ cleanup(solrCore, tmpIndexDir, indexDir, deleteTmpIdxDir, successfulInstall);
cleanupDone = true;
// we try with a full copy of the index
LOG.warn(
"Replication attempt was not successful - trying a full index replication reloadCore={}",
reloadCore);
- successfulInstall = fetchLatestIndex(core, true, reloadCore);
+ successfulInstall = fetchLatestIndex(true, reloadCore);
}
replicationStartTime = 0;
@@ -505,15 +507,15 @@ public class IndexFetcher {
} catch (InterruptedException e) {
throw new InterruptedException("Index fetch interrupted");
} catch (Exception e) {
- throw new SolrException(SolrException.ErrorCode.SERVER_ERROR, "Index fetch failed : ", e);
+ throw new SolrException(ErrorCode.SERVER_ERROR, "Index fetch failed : ", e);
}
} finally {
if (!cleanupDone) {
- cleanup(core, tmpIndexDir, indexDir, deleteTmpIdxDir, successfulInstall);
+ cleanup(solrCore, tmpIndexDir, indexDir, deleteTmpIdxDir, successfulInstall);
}
}
}
-
+
private void cleanup(final SolrCore core, Directory tmpIndexDir,
Directory indexDir, boolean deleteTmpIdxDir, boolean successfulInstall) throws IOException {
try {
@@ -524,9 +526,9 @@ public class IndexFetcher {
LOG.error("caught", e);
}
}
-
+
core.getUpdateHandler().getSolrCoreState().setLastReplicateIndexSuccess(successfulInstall);
-
+
filesToDownload = filesDownloaded = confFilesDownloaded = confFilesToDownload = null;
replicationStartTime = 0;
dirFileFetcher = null;
@@ -545,11 +547,11 @@ public class IndexFetcher {
SolrException.log(LOG, "Error removing directory " + tmpIndexDir, e);
}
}
-
+
if (tmpIndexDir != null) {
core.getDirectoryFactory().release(tmpIndexDir);
}
-
+
if (indexDir != null) {
core.getDirectoryFactory().release(indexDir);
}
@@ -719,15 +721,6 @@ public class IndexFetcher {
}
- /**
- * All the files are copied to a temp dir first
- */
- private String createTempindexDir(SolrCore core, String tmpIdxDirName) {
- // TODO: there should probably be a DirectoryFactory#concatPath(parent, name)
- // or something
- return core.getDataDir() + tmpIdxDirName;
- }
-
private void reloadCore() {
final CountDownLatch latch = new CountDownLatch(1);
new Thread() {
@@ -815,12 +808,12 @@ public class IndexFetcher {
|| filename.startsWith("segments_") || size < _100K);
}
- static class CompareResult {
+ protected static class CompareResult {
boolean equal = false;
boolean checkSummed = false;
}
-
- private CompareResult compareFile(Directory indexDir, String filename, Long backupIndexFileLen, Long backupIndexFileChecksum) {
+
+ protected static CompareResult compareFile(Directory indexDir, String filename, Long backupIndexFileLen, Long backupIndexFileChecksum) {
CompareResult compareResult = new CompareResult();
try {
try (final IndexInput indexInput = indexDir.openInput(filename, IOContext.READONCE)) {
@@ -887,8 +880,8 @@ public class IndexFetcher {
}
/**
- * All the files which are common between master and slave must have same size else we assume they are
- * not compatible (stale).
+ * All the files which are common between master and slave must have same size and same checksum else we assume
+ * they are not compatible (stale).
*
* @return true if the index stale and we need to download a fresh copy, false otherwise.
* @throws IOException if low level io error
@@ -1034,7 +1027,7 @@ public class IndexFetcher {
/**
* If the index is stale by any chance, load index from a different dir in the data dir.
*/
- private boolean modifyIndexProps(String tmpIdxDirName) {
+ protected static boolean modifyIndexProps(SolrCore solrCore, String tmpIdxDirName) {
LOG.info("New index installed. Updating index properties... index="+tmpIdxDirName);
Properties p = new Properties();
Directory dir = null;
@@ -1042,7 +1035,7 @@ public class IndexFetcher {
dir = solrCore.getDirectoryFactory().get(solrCore.getDataDir(), DirContext.META_DATA, solrCore.getSolrConfig().indexConfig.lockType);
if (slowFileExists(dir, IndexFetcher.INDEX_PROPERTIES)){
final IndexInput input = dir.openInput(IndexFetcher.INDEX_PROPERTIES, DirectoryFactory.IOCONTEXT_NO_CACHE);
-
+
final InputStream is = new PropertiesInputStream(input);
try {
p.load(new InputStreamReader(is, StandardCharsets.UTF_8));
@@ -1083,7 +1076,7 @@ public class IndexFetcher {
}
}
}
-
+
}
private final Map confFileInfoCache = new HashMap<>();
diff --git a/solr/core/src/java/org/apache/solr/handler/OldBackupDirectory.java b/solr/core/src/java/org/apache/solr/handler/OldBackupDirectory.java
new file mode 100644
index 00000000000..c4b86c7eddf
--- /dev/null
+++ b/solr/core/src/java/org/apache/solr/handler/OldBackupDirectory.java
@@ -0,0 +1,50 @@
+package org.apache.solr.handler;
+
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import java.io.File;
+import java.text.SimpleDateFormat;
+import java.util.Date;
+import java.util.Locale;
+import java.util.regex.Matcher;
+import java.util.regex.Pattern;
+
+class OldBackupDirectory implements Comparable {
+ File dir;
+ Date timestamp;
+ private final Pattern dirNamePattern = Pattern.compile("^snapshot[.](.*)$");
+
+ OldBackupDirectory(File dir) {
+ if(dir.isDirectory()) {
+ Matcher m = dirNamePattern.matcher(dir.getName());
+ if(m.find()) {
+ try {
+ this.dir = dir;
+ this.timestamp = new SimpleDateFormat(SnapShooter.DATE_FMT, Locale.ROOT).parse(m.group(1));
+ } catch(Exception e) {
+ this.dir = null;
+ this.timestamp = null;
+ }
+ }
+ }
+ }
+ @Override
+ public int compareTo(OldBackupDirectory that) {
+ return that.timestamp.compareTo(this.timestamp);
+ }
+}
diff --git a/solr/core/src/java/org/apache/solr/handler/ReplicationHandler.java b/solr/core/src/java/org/apache/solr/handler/ReplicationHandler.java
index 3c49448e661..fdae8f90949 100644
--- a/solr/core/src/java/org/apache/solr/handler/ReplicationHandler.java
+++ b/solr/core/src/java/org/apache/solr/handler/ReplicationHandler.java
@@ -36,7 +36,9 @@ import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.Properties;
+import java.util.concurrent.ExecutorService;
import java.util.concurrent.Executors;
+import java.util.concurrent.Future;
import java.util.concurrent.ScheduledExecutorService;
import java.util.concurrent.TimeUnit;
import java.util.concurrent.atomic.AtomicBoolean;
@@ -146,6 +148,13 @@ public class ReplicationHandler extends RequestHandlerBase implements SolrCoreAw
private ReentrantLock indexFetchLock = new ReentrantLock();
+ private ExecutorService restoreExecutor = Executors.newSingleThreadExecutor(
+ new DefaultSolrThreadFactory("restoreExecutor"));
+
+ private volatile Future restoreFuture;
+
+ private volatile String currentRestoreName;
+
private String includeConfFiles;
private NamedList confFileNameAlias = new NamedList<>();
@@ -205,13 +214,13 @@ public class ReplicationHandler extends RequestHandlerBase implements SolrCoreAw
// It gives the current 'replicateable' index version
if (command.equals(CMD_INDEX_VERSION)) {
IndexCommit commitPoint = indexCommitPoint; // make a copy so it won't change
-
+
if (commitPoint == null) {
// if this handler is 'lazy', we may not have tracked the last commit
// because our commit listener is registered on inform
commitPoint = core.getDeletionPolicy().getLatestCommit();
}
-
+
if (commitPoint != null && replicationEnabled.get()) {
//
// There is a race condition here. The commit point may be changed / deleted by the time
@@ -235,6 +244,11 @@ public class ReplicationHandler extends RequestHandlerBase implements SolrCoreAw
} else if (command.equalsIgnoreCase(CMD_BACKUP)) {
doSnapShoot(new ModifiableSolrParams(solrParams), rsp, req);
rsp.add(STATUS, OK_STATUS);
+ } else if (command.equalsIgnoreCase(CMD_RESTORE)) {
+ restore(new ModifiableSolrParams(solrParams), rsp, req);
+ rsp.add(STATUS, OK_STATUS);
+ } else if (command.equalsIgnoreCase(CMD_RESTORE_STATUS)) {
+ rsp.add(CMD_RESTORE_STATUS, getRestoreStatus());
} else if (command.equalsIgnoreCase(CMD_DELETE_BACKUP)) {
deleteSnapshot(new ModifiableSolrParams(solrParams));
rsp.add(STATUS, OK_STATUS);
@@ -302,7 +316,7 @@ public class ReplicationHandler extends RequestHandlerBase implements SolrCoreAw
throw new SolrException(ErrorCode.BAD_REQUEST, "Missing mandatory param: name");
}
- SnapShooter snapShooter = new SnapShooter(core, params.get("location"), params.get(NAME));
+ SnapShooter snapShooter = new SnapShooter(core, params.get(LOCATION), params.get(NAME));
snapShooter.validateDeleteSnapshot();
snapShooter.deleteSnapAsync(this);
}
@@ -361,7 +375,7 @@ public class ReplicationHandler extends RequestHandlerBase implements SolrCoreAw
} else {
currentIndexFetcher = pollingIndexFetcher;
}
- return currentIndexFetcher.fetchLatestIndex(core, forceReplication);
+ return currentIndexFetcher.fetchLatestIndex(forceReplication);
} catch (Exception e) {
SolrException.log(LOG, "Index fetch failed ", e);
} finally {
@@ -377,6 +391,72 @@ public class ReplicationHandler extends RequestHandlerBase implements SolrCoreAw
return indexFetchLock.isLocked();
}
+ private void restore(SolrParams params, SolrQueryResponse rsp, SolrQueryRequest req) {
+ if (restoreFuture != null && !restoreFuture.isDone()) {
+ throw new SolrException(ErrorCode.BAD_REQUEST, "Restore in progress. Cannot run multiple restore operations" +
+ "for the same core");
+ }
+ String name = params.get(NAME);
+ String location = params.get(LOCATION);
+
+ //If location is not provided then assume that the restore index is present inside the data directory.
+ if (location == null) {
+ location = core.getDataDir();
+ }
+
+ //If name is not provided then look for the last unnamed( the ones with the snapshot.timestamp format)
+ //snapshot folder since we allow snapshots to be taken without providing a name. Pick the latest timestamp.
+ if (name == null) {
+ File[] files = new File(location).listFiles();
+ List dirs = new ArrayList<>();
+ for (File f : files) {
+ OldBackupDirectory obd = new OldBackupDirectory(f);
+ if (obd.dir != null) {
+ dirs.add(obd);
+ }
+ }
+ Collections.sort(dirs);
+ if (dirs.size() == 0) {
+ throw new SolrException(ErrorCode.BAD_REQUEST, "No backup name specified and none found in " + core.getDataDir());
+ }
+ name = dirs.get(0).dir.getName();
+ } else {
+ //"snapshot." is prefixed by snapshooter
+ name = "snapshot." + name;
+ }
+
+ RestoreCore restoreCore = new RestoreCore(core, location, name);
+ restoreFuture = restoreExecutor.submit(restoreCore);
+ currentRestoreName = name;
+ }
+
+ private NamedList