From 989cf865339175ad041f9f638c08408d59c1be50 Mon Sep 17 00:00:00 2001 From: Michael McCandless Date: Thu, 2 Apr 2015 20:23:03 +0000 Subject: [PATCH 1/9] LUCENE-6386: correct javadocs about temp disk space required for forceMerge(1) git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1670959 13f79535-47bb-0310-9956-ffa450edef68 --- lucene/CHANGES.txt | 4 ++++ .../org/apache/lucene/index/IndexWriter.java | 17 +++++++++-------- .../lucene/index/TestIndexWriterForceMerge.java | 4 ++-- 3 files changed, 15 insertions(+), 10 deletions(-) diff --git a/lucene/CHANGES.txt b/lucene/CHANGES.txt index 0d8221dc884..0468133a323 100644 --- a/lucene/CHANGES.txt +++ b/lucene/CHANGES.txt @@ -123,6 +123,10 @@ Bug Fixes DocumentsWriterStallControl to prevent hangs during indexing if we miss a .notify/All somewhere (Mike McCandless) +* LUCENE-6386: Correct IndexWriter.forceMerge documentation to state + that up to 3X (X = current index size) spare disk space may be needed + to complete forceMerge(1). (Robert Muir, Shai Erera, Mike McCandless) + Optimizations * LUCENE-6183, LUCENE-5647: Avoid recompressing stored fields diff --git a/lucene/core/src/java/org/apache/lucene/index/IndexWriter.java b/lucene/core/src/java/org/apache/lucene/index/IndexWriter.java index a64be82aef2..47267682017 100644 --- a/lucene/core/src/java/org/apache/lucene/index/IndexWriter.java +++ b/lucene/core/src/java/org/apache/lucene/index/IndexWriter.java @@ -1547,14 +1547,15 @@ public class IndexWriter implements Closeable, TwoPhaseCommit, Accountable { * longer be changed).

* *

Note that this requires free space that is proportional - * to the size of the index in your Directory (2X if you're - * using compound file format). For example, if your index - * size is 10 MB then you need an additional 10 MB free for - * this to complete (20 MB if you're using compound file - * format). This is also affected by the {@link Codec} that - * is used to execute the merge, and may result in even a - * bigger index. Also, it's best to call {@link #commit()} - * afterwards, to allow IndexWriter to free up disk space.

+ * to the size of the index in your Directory: 2X if you are + * not using compound file format, and 3X if you are. + * For example, if your index size is 10 MB then you need + * an additional 20 MB free for this to complete (30 MB if + * you're using compound file format). This is also affected + * by the {@link Codec} that is used to execute the merge, + * and may result in even a bigger index. Also, it's best + * to call {@link #commit()} afterwards, to allow IndexWriter + * to free up disk space.

* *

If some but not all readers re-open while merging * is underway, this will cause {@code > 2X} temporary diff --git a/lucene/core/src/test/org/apache/lucene/index/TestIndexWriterForceMerge.java b/lucene/core/src/test/org/apache/lucene/index/TestIndexWriterForceMerge.java index 9c219e4cadd..19030cc00a1 100644 --- a/lucene/core/src/test/org/apache/lucene/index/TestIndexWriterForceMerge.java +++ b/lucene/core/src/test/org/apache/lucene/index/TestIndexWriterForceMerge.java @@ -199,8 +199,8 @@ public class TestIndexWriterForceMerge extends LuceneTestCase { assertTrue("forceMerge used too much temporary space: starting usage was " + startDiskUsage + " bytes; final usage was " + finalDiskUsage + " bytes; max temp usage was " + maxDiskUsage - + " but should have been " + (3 * maxStartFinalDiskUsage) - + " (= 3X starting usage), BEFORE=" + startListing + "AFTER=" + listFiles(dir), maxDiskUsage <= 3 * maxStartFinalDiskUsage); + + " but should have been at most " + (4 * maxStartFinalDiskUsage) + + " (= 4X starting usage), BEFORE=" + startListing + "AFTER=" + listFiles(dir), maxDiskUsage <= 4 * maxStartFinalDiskUsage); dir.close(); } From 548edc5406aaab07ab633d5c15caec881e1ecca3 Mon Sep 17 00:00:00 2001 From: Areek Zillur Date: Thu, 2 Apr 2015 21:17:34 +0000 Subject: [PATCH 2/9] LUCENE-6339: fix test bug (ensure opening nrt reader with applyAllDeletes) git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1670972 13f79535-47bb-0310-9956-ffa450edef68 --- .../suggest/document/SuggestFieldTest.java | 38 +++++++++++++++---- 1 file changed, 30 insertions(+), 8 deletions(-) diff --git a/lucene/suggest/src/test/org/apache/lucene/search/suggest/document/SuggestFieldTest.java b/lucene/suggest/src/test/org/apache/lucene/search/suggest/document/SuggestFieldTest.java index 4c9d3a4c7dd..1bd442e82f1 100644 --- a/lucene/suggest/src/test/org/apache/lucene/search/suggest/document/SuggestFieldTest.java +++ b/lucene/suggest/src/test/org/apache/lucene/search/suggest/document/SuggestFieldTest.java @@ -43,7 +43,6 @@ import org.apache.lucene.index.IndexWriter; import org.apache.lucene.index.IndexWriterConfig; import org.apache.lucene.index.LeafReaderContext; import org.apache.lucene.index.RandomIndexWriter; -import org.apache.lucene.index.StorableField; import org.apache.lucene.index.StoredDocument; import org.apache.lucene.index.Term; import org.apache.lucene.queries.TermsQuery; @@ -57,11 +56,9 @@ import org.apache.lucene.search.TopDocs; import org.apache.lucene.store.Directory; import org.apache.lucene.util.Bits; import org.apache.lucene.util.BytesRef; -import org.apache.lucene.util.BytesRefBuilder; import org.apache.lucene.util.FixedBitSet; import org.apache.lucene.util.LineFileDocs; import org.apache.lucene.util.LuceneTestCase; -import org.apache.lucene.util.NumericUtils; import org.apache.lucene.util.TestUtil; import org.junit.After; import org.junit.Before; @@ -158,9 +155,10 @@ public class SuggestFieldTest extends LuceneTestCase { weights[i] = Math.abs(random().nextLong()); document.add(newSuggestField("suggest_field", "abc", weights[i])); iw.addDocument(document); - } - if (rarely()) { - iw.commit(); + + if (usually()) { + iw.commit(); + } } DirectoryReader reader = iw.getReader(); @@ -200,11 +198,15 @@ public class SuggestFieldTest extends LuceneTestCase { } iw.addDocument(document); document.clear(); + + if (usually()) { + iw.commit(); + } } iw.deleteDocuments(new Term("str_field", "delete")); - DirectoryReader reader = DirectoryReader.open(iw, false); + DirectoryReader reader = DirectoryReader.open(iw, true); SuggestIndexSearcher indexSearcher = new SuggestIndexSearcher(reader, analyzer); TopSuggestDocs suggest = indexSearcher.suggest("suggest_field", "abc_", numLive); assertSuggestions(suggest, expectedEntries.toArray(new Entry[expectedEntries.size()])); @@ -224,6 +226,10 @@ public class SuggestFieldTest extends LuceneTestCase { document.add(newStringField("str_fld", "deleted", Field.Store.NO)); iw.addDocument(document); document.clear(); + + if (usually()) { + iw.commit(); + } } Filter filter = new QueryWrapperFilter(new TermsQuery("str_fld", new BytesRef("non_existent"))); @@ -249,11 +255,15 @@ public class SuggestFieldTest extends LuceneTestCase { document.add(newStringField("delete", "delete", Field.Store.NO)); iw.addDocument(document); document.clear(); + + if (usually()) { + iw.commit(); + } } iw.deleteDocuments(new Term("delete", "delete")); - DirectoryReader reader = DirectoryReader.open(iw, false); + DirectoryReader reader = DirectoryReader.open(iw, true); SuggestIndexSearcher indexSearcher = new SuggestIndexSearcher(reader, analyzer); TopSuggestDocs suggest = indexSearcher.suggest("suggest_field", "abc_", num); assertThat(suggest.totalHits, equalTo(0)); @@ -274,6 +284,10 @@ public class SuggestFieldTest extends LuceneTestCase { document.add(new IntField("weight_fld", i, Field.Store.YES)); iw.addDocument(document); document.clear(); + + if (usually()) { + iw.commit(); + } } iw.deleteDocuments(NumericRangeQuery.newIntRange("weight_fld", 2, null, true, false)); @@ -298,6 +312,10 @@ public class SuggestFieldTest extends LuceneTestCase { document.add(new IntField("filter_int_fld", i, Field.Store.NO)); iw.addDocument(document); document.clear(); + + if (usually()) { + iw.commit(); + } } DirectoryReader reader = iw.getReader(); @@ -542,6 +560,10 @@ public class SuggestFieldTest extends LuceneTestCase { document.add(newSuggestField("suggest_field", suggest, weight)); mappings.put(suggest, weight); iw.addDocument(document); + + if (usually()) { + iw.commit(); + } } DirectoryReader reader = iw.getReader(); From c4d9d6b3f10d4128b820804830abf8c9d44a9d3a Mon Sep 17 00:00:00 2001 From: Martijn van Groningen Date: Thu, 2 Apr 2015 22:00:26 +0000 Subject: [PATCH 3/9] LUCENE-6352: Added a new query time join to the join module that uses global ordinals, which is faster for subsequent joins between reopens. git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1670990 13f79535-47bb-0310-9956-ffa450edef68 --- lucene/CHANGES.txt | 4 + .../search/join/BaseGlobalOrdinalScorer.java | 97 +++++ .../search/join/GlobalOrdinalsCollector.java | 114 ++++++ .../search/join/GlobalOrdinalsQuery.java | 245 ++++++++++++ .../GlobalOrdinalsWithScoreCollector.java | 250 ++++++++++++ .../join/GlobalOrdinalsWithScoreQuery.java | 256 ++++++++++++ .../apache/lucene/search/join/JoinUtil.java | 79 ++++ .../lucene/search/join/TestJoinUtil.java | 372 ++++++++++++++---- 8 files changed, 1334 insertions(+), 83 deletions(-) create mode 100644 lucene/join/src/java/org/apache/lucene/search/join/BaseGlobalOrdinalScorer.java create mode 100644 lucene/join/src/java/org/apache/lucene/search/join/GlobalOrdinalsCollector.java create mode 100644 lucene/join/src/java/org/apache/lucene/search/join/GlobalOrdinalsQuery.java create mode 100644 lucene/join/src/java/org/apache/lucene/search/join/GlobalOrdinalsWithScoreCollector.java create mode 100644 lucene/join/src/java/org/apache/lucene/search/join/GlobalOrdinalsWithScoreQuery.java diff --git a/lucene/CHANGES.txt b/lucene/CHANGES.txt index 0468133a323..4ef155e4551 100644 --- a/lucene/CHANGES.txt +++ b/lucene/CHANGES.txt @@ -45,6 +45,10 @@ New Features faster intersection by avoiding loading positions in certain cases. (Paul Elschot, Robert Muir via Mike McCandless) +* LUCENE-6352: Added a new query time join to the join module that uses + global ordinals, which is faster for subsequent joins between reopens. + (Martijn van Groningen, Adrien Grand) + Optimizations * LUCENE-6379: IndexWriter.deleteDocuments(Query...) now detects if diff --git a/lucene/join/src/java/org/apache/lucene/search/join/BaseGlobalOrdinalScorer.java b/lucene/join/src/java/org/apache/lucene/search/join/BaseGlobalOrdinalScorer.java new file mode 100644 index 00000000000..4d81d58f92a --- /dev/null +++ b/lucene/join/src/java/org/apache/lucene/search/join/BaseGlobalOrdinalScorer.java @@ -0,0 +1,97 @@ +package org.apache.lucene.search.join; + +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import org.apache.lucene.index.SortedDocValues; +import org.apache.lucene.search.DocIdSetIterator; +import org.apache.lucene.search.Scorer; +import org.apache.lucene.search.TwoPhaseIterator; +import org.apache.lucene.search.Weight; +import org.apache.lucene.util.LongBitSet; + +import java.io.IOException; + +abstract class BaseGlobalOrdinalScorer extends Scorer { + + final LongBitSet foundOrds; + final SortedDocValues values; + final Scorer approximationScorer; + + float score; + + public BaseGlobalOrdinalScorer(Weight weight, LongBitSet foundOrds, SortedDocValues values, Scorer approximationScorer) { + super(weight); + this.foundOrds = foundOrds; + this.values = values; + this.approximationScorer = approximationScorer; + } + + @Override + public float score() throws IOException { + return score; + } + + @Override + public int docID() { + return approximationScorer.docID(); + } + + @Override + public int nextDoc() throws IOException { + return advance(approximationScorer.docID() + 1); + } + + @Override + public TwoPhaseIterator asTwoPhaseIterator() { + final DocIdSetIterator approximation = new DocIdSetIterator() { + @Override + public int docID() { + return approximationScorer.docID(); + } + + @Override + public int nextDoc() throws IOException { + return approximationScorer.nextDoc(); + } + + @Override + public int advance(int target) throws IOException { + return approximationScorer.advance(target); + } + + @Override + public long cost() { + return approximationScorer.cost(); + } + }; + return createTwoPhaseIterator(approximation); + } + + @Override + public long cost() { + return approximationScorer.cost(); + } + + @Override + public int freq() throws IOException { + return 1; + } + + protected abstract TwoPhaseIterator createTwoPhaseIterator(DocIdSetIterator approximation); + +} \ No newline at end of file diff --git a/lucene/join/src/java/org/apache/lucene/search/join/GlobalOrdinalsCollector.java b/lucene/join/src/java/org/apache/lucene/search/join/GlobalOrdinalsCollector.java new file mode 100644 index 00000000000..8a874621a79 --- /dev/null +++ b/lucene/join/src/java/org/apache/lucene/search/join/GlobalOrdinalsCollector.java @@ -0,0 +1,114 @@ +package org.apache.lucene.search.join; + +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import org.apache.lucene.index.DocValues; +import org.apache.lucene.index.LeafReaderContext; +import org.apache.lucene.index.MultiDocValues; +import org.apache.lucene.index.SortedDocValues; +import org.apache.lucene.search.Collector; +import org.apache.lucene.search.LeafCollector; +import org.apache.lucene.search.Scorer; +import org.apache.lucene.util.LongBitSet; +import org.apache.lucene.util.LongValues; + +import java.io.IOException; + +/** + * A collector that collects all ordinals from a specified field matching the query. + * + * @lucene.experimental + */ +final class GlobalOrdinalsCollector implements Collector { + + final String field; + final LongBitSet collectedOrds; + final MultiDocValues.OrdinalMap ordinalMap; + + GlobalOrdinalsCollector(String field, MultiDocValues.OrdinalMap ordinalMap, long valueCount) { + this.field = field; + this.ordinalMap = ordinalMap; + this.collectedOrds = new LongBitSet(valueCount); + } + + public LongBitSet getCollectorOrdinals() { + return collectedOrds; + } + + @Override + public boolean needsScores() { + return false; + } + + @Override + public LeafCollector getLeafCollector(LeafReaderContext context) throws IOException { + SortedDocValues docTermOrds = DocValues.getSorted(context.reader(), field); + if (ordinalMap != null) { + LongValues segmentOrdToGlobalOrdLookup = ordinalMap.getGlobalOrds(context.ord); + return new OrdinalMapCollector(docTermOrds, segmentOrdToGlobalOrdLookup); + } else { + return new SegmentOrdinalCollector(docTermOrds); + } + } + + final class OrdinalMapCollector implements LeafCollector { + + private final SortedDocValues docTermOrds; + private final LongValues segmentOrdToGlobalOrdLookup; + + OrdinalMapCollector(SortedDocValues docTermOrds, LongValues segmentOrdToGlobalOrdLookup) { + this.docTermOrds = docTermOrds; + this.segmentOrdToGlobalOrdLookup = segmentOrdToGlobalOrdLookup; + } + + @Override + public void collect(int doc) throws IOException { + final long segmentOrd = docTermOrds.getOrd(doc); + if (segmentOrd != -1) { + final long globalOrd = segmentOrdToGlobalOrdLookup.get(segmentOrd); + collectedOrds.set(globalOrd); + } + } + + @Override + public void setScorer(Scorer scorer) throws IOException { + } + } + + final class SegmentOrdinalCollector implements LeafCollector { + + private final SortedDocValues docTermOrds; + + SegmentOrdinalCollector(SortedDocValues docTermOrds) { + this.docTermOrds = docTermOrds; + } + + @Override + public void collect(int doc) throws IOException { + final long segmentOrd = docTermOrds.getOrd(doc); + if (segmentOrd != -1) { + collectedOrds.set(segmentOrd); + } + } + + @Override + public void setScorer(Scorer scorer) throws IOException { + } + } + +} diff --git a/lucene/join/src/java/org/apache/lucene/search/join/GlobalOrdinalsQuery.java b/lucene/join/src/java/org/apache/lucene/search/join/GlobalOrdinalsQuery.java new file mode 100644 index 00000000000..a96881df1a9 --- /dev/null +++ b/lucene/join/src/java/org/apache/lucene/search/join/GlobalOrdinalsQuery.java @@ -0,0 +1,245 @@ +package org.apache.lucene.search.join; + +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import org.apache.lucene.index.DocValues; +import org.apache.lucene.index.IndexReader; +import org.apache.lucene.index.LeafReaderContext; +import org.apache.lucene.index.MultiDocValues; +import org.apache.lucene.index.SortedDocValues; +import org.apache.lucene.index.Term; +import org.apache.lucene.search.ComplexExplanation; +import org.apache.lucene.search.DocIdSetIterator; +import org.apache.lucene.search.Explanation; +import org.apache.lucene.search.IndexSearcher; +import org.apache.lucene.search.Query; +import org.apache.lucene.search.Scorer; +import org.apache.lucene.search.TwoPhaseIterator; +import org.apache.lucene.search.Weight; +import org.apache.lucene.util.Bits; +import org.apache.lucene.util.BytesRef; +import org.apache.lucene.util.LongBitSet; +import org.apache.lucene.util.LongValues; + +import java.io.IOException; +import java.util.Set; + +final class GlobalOrdinalsQuery extends Query { + + // All the ords of matching docs found with OrdinalsCollector. + private final LongBitSet foundOrds; + private final String joinField; + private final MultiDocValues.OrdinalMap globalOrds; + // Is also an approximation of the docs that will match. Can be all docs that have toField or something more specific. + private final Query toQuery; + + // just for hashcode and equals: + private final Query fromQuery; + private final IndexReader indexReader; + + GlobalOrdinalsQuery(LongBitSet foundOrds, String joinField, MultiDocValues.OrdinalMap globalOrds, Query toQuery, Query fromQuery, IndexReader indexReader) { + this.foundOrds = foundOrds; + this.joinField = joinField; + this.globalOrds = globalOrds; + this.toQuery = toQuery; + this.fromQuery = fromQuery; + this.indexReader = indexReader; + } + + @Override + public Weight createWeight(IndexSearcher searcher, boolean needsScores) throws IOException { + return new W(this, toQuery.createWeight(searcher, false)); + } + + @Override + public void extractTerms(Set terms) { + fromQuery.extractTerms(terms); + } + + @Override + public boolean equals(Object o) { + if (this == o) return true; + if (o == null || getClass() != o.getClass()) return false; + if (!super.equals(o)) return false; + + GlobalOrdinalsQuery that = (GlobalOrdinalsQuery) o; + + if (!fromQuery.equals(that.fromQuery)) return false; + if (!joinField.equals(that.joinField)) return false; + if (!toQuery.equals(that.toQuery)) return false; + if (!indexReader.equals(that.indexReader)) return false; + + return true; + } + + @Override + public int hashCode() { + int result = super.hashCode(); + result = 31 * result + joinField.hashCode(); + result = 31 * result + toQuery.hashCode(); + result = 31 * result + fromQuery.hashCode(); + result = 31 * result + indexReader.hashCode(); + return result; + } + + @Override + public String toString(String field) { + return "GlobalOrdinalsQuery{" + + "joinField=" + joinField + + '}'; + } + + final class W extends Weight { + + private final Weight approximationWeight; + + private float queryNorm; + private float queryWeight; + + W(Query query, Weight approximationWeight) { + super(query); + this.approximationWeight = approximationWeight; + } + + @Override + public Explanation explain(LeafReaderContext context, int doc) throws IOException { + SortedDocValues values = DocValues.getSorted(context.reader(), joinField); + if (values != null) { + int segmentOrd = values.getOrd(doc); + if (segmentOrd != -1) { + BytesRef joinValue = values.lookupOrd(segmentOrd); + return new ComplexExplanation(true, queryNorm, "Score based on join value " + joinValue.utf8ToString()); + } + } + return new ComplexExplanation(false, 0.0f, "Not a match"); + } + + @Override + public float getValueForNormalization() throws IOException { + queryWeight = getBoost(); + return queryWeight * queryWeight; + } + + @Override + public void normalize(float norm, float topLevelBoost) { + this.queryNorm = norm * topLevelBoost; + queryWeight *= this.queryNorm; + } + + @Override + public Scorer scorer(LeafReaderContext context, Bits acceptDocs) throws IOException { + SortedDocValues values = DocValues.getSorted(context.reader(), joinField); + if (values == null) { + return null; + } + + Scorer approximationScorer = approximationWeight.scorer(context, acceptDocs); + if (approximationScorer == null) { + return null; + } + if (globalOrds != null) { + return new OrdinalMapScorer(this, queryNorm, foundOrds, values, approximationScorer, globalOrds.getGlobalOrds(context.ord)); + } { + return new SegmentOrdinalScorer(this, queryNorm, foundOrds, values, approximationScorer); + } + } + + } + + final static class OrdinalMapScorer extends BaseGlobalOrdinalScorer { + + final LongValues segmentOrdToGlobalOrdLookup; + + public OrdinalMapScorer(Weight weight, float score, LongBitSet foundOrds, SortedDocValues values, Scorer approximationScorer, LongValues segmentOrdToGlobalOrdLookup) { + super(weight, foundOrds, values, approximationScorer); + this.score = score; + this.segmentOrdToGlobalOrdLookup = segmentOrdToGlobalOrdLookup; + } + + @Override + public int advance(int target) throws IOException { + for (int docID = approximationScorer.advance(target); docID < NO_MORE_DOCS; docID = approximationScorer.nextDoc()) { + final long segmentOrd = values.getOrd(docID); + if (segmentOrd != -1) { + final long globalOrd = segmentOrdToGlobalOrdLookup.get(segmentOrd); + if (foundOrds.get(globalOrd)) { + return docID; + } + } + } + return NO_MORE_DOCS; + } + + @Override + protected TwoPhaseIterator createTwoPhaseIterator(DocIdSetIterator approximation) { + return new TwoPhaseIterator(approximation) { + + @Override + public boolean matches() throws IOException { + final long segmentOrd = values.getOrd(approximationScorer.docID()); + if (segmentOrd != -1) { + final long globalOrd = segmentOrdToGlobalOrdLookup.get(segmentOrd); + if (foundOrds.get(globalOrd)) { + return true; + } + } + return false; + } + }; + } + } + + final static class SegmentOrdinalScorer extends BaseGlobalOrdinalScorer { + + public SegmentOrdinalScorer(Weight weight, float score, LongBitSet foundOrds, SortedDocValues values, Scorer approximationScorer) { + super(weight, foundOrds, values, approximationScorer); + this.score = score; + } + + @Override + public int advance(int target) throws IOException { + for (int docID = approximationScorer.advance(target); docID < NO_MORE_DOCS; docID = approximationScorer.nextDoc()) { + final long segmentOrd = values.getOrd(docID); + if (segmentOrd != -1) { + if (foundOrds.get(segmentOrd)) { + return docID; + } + } + } + return NO_MORE_DOCS; + } + + @Override + protected TwoPhaseIterator createTwoPhaseIterator(DocIdSetIterator approximation) { + return new TwoPhaseIterator(approximation) { + + @Override + public boolean matches() throws IOException { + final long segmentOrd = values.getOrd(approximationScorer.docID()); + if (segmentOrd != -1) { + if (foundOrds.get(segmentOrd)) { + return true; + } + } + return false; + } + }; + } + + } +} diff --git a/lucene/join/src/java/org/apache/lucene/search/join/GlobalOrdinalsWithScoreCollector.java b/lucene/join/src/java/org/apache/lucene/search/join/GlobalOrdinalsWithScoreCollector.java new file mode 100644 index 00000000000..37ac699280f --- /dev/null +++ b/lucene/join/src/java/org/apache/lucene/search/join/GlobalOrdinalsWithScoreCollector.java @@ -0,0 +1,250 @@ +package org.apache.lucene.search.join; + +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import org.apache.lucene.index.DocValues; +import org.apache.lucene.index.LeafReaderContext; +import org.apache.lucene.index.MultiDocValues; +import org.apache.lucene.index.SortedDocValues; +import org.apache.lucene.search.Collector; +import org.apache.lucene.search.LeafCollector; +import org.apache.lucene.search.Scorer; +import org.apache.lucene.util.LongBitSet; +import org.apache.lucene.util.LongValues; + +import java.io.IOException; + +abstract class GlobalOrdinalsWithScoreCollector implements Collector { + + final String field; + final MultiDocValues.OrdinalMap ordinalMap; + final LongBitSet collectedOrds; + protected final Scores scores; + + GlobalOrdinalsWithScoreCollector(String field, MultiDocValues.OrdinalMap ordinalMap, long valueCount) { + if (valueCount > Integer.MAX_VALUE) { + // We simply don't support more than + throw new IllegalStateException("Can't collect more than [" + Integer.MAX_VALUE + "] ids"); + } + this.field = field; + this.ordinalMap = ordinalMap; + this.collectedOrds = new LongBitSet(valueCount); + this.scores = new Scores(valueCount); + } + + public LongBitSet getCollectorOrdinals() { + return collectedOrds; + } + + public float score(int globalOrdinal) { + return scores.getScore(globalOrdinal); + } + + protected abstract void doScore(int globalOrd, float existingScore, float newScore); + + @Override + public LeafCollector getLeafCollector(LeafReaderContext context) throws IOException { + SortedDocValues docTermOrds = DocValues.getSorted(context.reader(), field); + if (ordinalMap != null) { + LongValues segmentOrdToGlobalOrdLookup = ordinalMap.getGlobalOrds(context.ord); + return new OrdinalMapCollector(docTermOrds, segmentOrdToGlobalOrdLookup); + } else { + return new SegmentOrdinalCollector(docTermOrds); + } + } + + @Override + public boolean needsScores() { + return true; + } + + final class OrdinalMapCollector implements LeafCollector { + + private final SortedDocValues docTermOrds; + private final LongValues segmentOrdToGlobalOrdLookup; + private Scorer scorer; + + OrdinalMapCollector(SortedDocValues docTermOrds, LongValues segmentOrdToGlobalOrdLookup) { + this.docTermOrds = docTermOrds; + this.segmentOrdToGlobalOrdLookup = segmentOrdToGlobalOrdLookup; + } + + @Override + public void collect(int doc) throws IOException { + final long segmentOrd = docTermOrds.getOrd(doc); + if (segmentOrd != -1) { + final int globalOrd = (int) segmentOrdToGlobalOrdLookup.get(segmentOrd); + collectedOrds.set(globalOrd); + float existingScore = scores.getScore(globalOrd); + float newScore = scorer.score(); + doScore(globalOrd, existingScore, newScore); + } + } + + @Override + public void setScorer(Scorer scorer) throws IOException { + this.scorer = scorer; + } + } + + final class SegmentOrdinalCollector implements LeafCollector { + + private final SortedDocValues docTermOrds; + private Scorer scorer; + + SegmentOrdinalCollector(SortedDocValues docTermOrds) { + this.docTermOrds = docTermOrds; + } + + @Override + public void collect(int doc) throws IOException { + final int segmentOrd = docTermOrds.getOrd(doc); + if (segmentOrd != -1) { + collectedOrds.set(segmentOrd); + float existingScore = scores.getScore(segmentOrd); + float newScore = scorer.score(); + doScore(segmentOrd, existingScore, newScore); + } + } + + @Override + public void setScorer(Scorer scorer) throws IOException { + this.scorer = scorer; + } + } + + static final class Max extends GlobalOrdinalsWithScoreCollector { + + public Max(String field, MultiDocValues.OrdinalMap ordinalMap, long valueCount) { + super(field, ordinalMap, valueCount); + } + + @Override + protected void doScore(int globalOrd, float existingScore, float newScore) { + scores.setScore(globalOrd, Math.max(existingScore, newScore)); + } + + } + + static final class Sum extends GlobalOrdinalsWithScoreCollector { + + public Sum(String field, MultiDocValues.OrdinalMap ordinalMap, long valueCount) { + super(field, ordinalMap, valueCount); + } + + @Override + protected void doScore(int globalOrd, float existingScore, float newScore) { + scores.setScore(globalOrd, existingScore + newScore); + } + + } + + static final class Avg extends GlobalOrdinalsWithScoreCollector { + + private final Occurrences occurrences; + + public Avg(String field, MultiDocValues.OrdinalMap ordinalMap, long valueCount) { + super(field, ordinalMap, valueCount); + this.occurrences = new Occurrences(valueCount); + } + + @Override + protected void doScore(int globalOrd, float existingScore, float newScore) { + occurrences.increment(globalOrd); + scores.setScore(globalOrd, existingScore + newScore); + } + + @Override + public float score(int globalOrdinal) { + return scores.getScore(globalOrdinal) / occurrences.getOccurence(globalOrdinal); + } + } + + // Because the global ordinal is directly used as a key to a score we should be somewhat smart about allocation + // the scores array. Most of the times not all docs match so splitting the scores array up in blocks can prevent creation of huge arrays. + // Also working with smaller arrays is supposed to be more gc friendly + // + // At first a hash map implementation would make sense, but in the case that more than half of docs match this becomes more expensive + // then just using an array. + + // Maybe this should become a method parameter? + static final int arraySize = 4096; + + static final class Scores { + + final float[][] blocks; + + private Scores(long valueCount) { + long blockSize = valueCount + arraySize - 1; + blocks = new float[(int) ((blockSize) / arraySize)][]; + } + + public void setScore(int globalOrdinal, float score) { + int block = globalOrdinal / arraySize; + int offset = globalOrdinal % arraySize; + float[] scores = blocks[block]; + if (scores == null) { + blocks[block] = scores = new float[arraySize]; + } + scores[offset] = score; + } + + public float getScore(int globalOrdinal) { + int block = globalOrdinal / arraySize; + int offset = globalOrdinal % arraySize; + float[] scores = blocks[block]; + float score; + if (scores != null) { + score = scores[offset]; + } else { + score = 0f; + } + return score; + } + + } + + static final class Occurrences { + + final int[][] blocks; + + private Occurrences(long valueCount) { + long blockSize = valueCount + arraySize - 1; + blocks = new int[(int) (blockSize / arraySize)][]; + } + + public void increment(int globalOrdinal) { + int block = globalOrdinal / arraySize; + int offset = globalOrdinal % arraySize; + int[] occurrences = blocks[block]; + if (occurrences == null) { + blocks[block] = occurrences = new int[arraySize]; + } + occurrences[offset]++; + } + + public int getOccurence(int globalOrdinal) { + int block = globalOrdinal / arraySize; + int offset = globalOrdinal % arraySize; + int[] occurrences = blocks[block]; + return occurrences[offset]; + } + + } + +} diff --git a/lucene/join/src/java/org/apache/lucene/search/join/GlobalOrdinalsWithScoreQuery.java b/lucene/join/src/java/org/apache/lucene/search/join/GlobalOrdinalsWithScoreQuery.java new file mode 100644 index 00000000000..f9d4df7418d --- /dev/null +++ b/lucene/join/src/java/org/apache/lucene/search/join/GlobalOrdinalsWithScoreQuery.java @@ -0,0 +1,256 @@ +package org.apache.lucene.search.join; + +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import org.apache.lucene.index.DocValues; +import org.apache.lucene.index.IndexReader; +import org.apache.lucene.index.LeafReaderContext; +import org.apache.lucene.index.MultiDocValues; +import org.apache.lucene.index.SortedDocValues; +import org.apache.lucene.index.Term; +import org.apache.lucene.search.ComplexExplanation; +import org.apache.lucene.search.DocIdSetIterator; +import org.apache.lucene.search.Explanation; +import org.apache.lucene.search.IndexSearcher; +import org.apache.lucene.search.Query; +import org.apache.lucene.search.Scorer; +import org.apache.lucene.search.TwoPhaseIterator; +import org.apache.lucene.search.Weight; +import org.apache.lucene.util.Bits; +import org.apache.lucene.util.BytesRef; +import org.apache.lucene.util.LongValues; + +import java.io.IOException; +import java.util.Set; + +final class GlobalOrdinalsWithScoreQuery extends Query { + + private final GlobalOrdinalsWithScoreCollector collector; + private final String joinField; + private final MultiDocValues.OrdinalMap globalOrds; + // Is also an approximation of the docs that will match. Can be all docs that have toField or something more specific. + private final Query toQuery; + + // just for hashcode and equals: + private final Query fromQuery; + private final IndexReader indexReader; + + GlobalOrdinalsWithScoreQuery(GlobalOrdinalsWithScoreCollector collector, String joinField, MultiDocValues.OrdinalMap globalOrds, Query toQuery, Query fromQuery, IndexReader indexReader) { + this.collector = collector; + this.joinField = joinField; + this.globalOrds = globalOrds; + this.toQuery = toQuery; + this.fromQuery = fromQuery; + this.indexReader = indexReader; + } + + @Override + public Weight createWeight(IndexSearcher searcher, boolean needsScores) throws IOException { + return new W(this, toQuery.createWeight(searcher, false)); + } + + @Override + public void extractTerms(Set terms) { + fromQuery.extractTerms(terms); + } + + @Override + public boolean equals(Object o) { + if (this == o) return true; + if (o == null || getClass() != o.getClass()) return false; + if (!super.equals(o)) return false; + + GlobalOrdinalsWithScoreQuery that = (GlobalOrdinalsWithScoreQuery) o; + + if (!fromQuery.equals(that.fromQuery)) return false; + if (!joinField.equals(that.joinField)) return false; + if (!toQuery.equals(that.toQuery)) return false; + if (!indexReader.equals(that.indexReader)) return false; + + return true; + } + + @Override + public int hashCode() { + int result = super.hashCode(); + result = 31 * result + joinField.hashCode(); + result = 31 * result + toQuery.hashCode(); + result = 31 * result + fromQuery.hashCode(); + result = 31 * result + indexReader.hashCode(); + return result; + } + + @Override + public String toString(String field) { + return "GlobalOrdinalsQuery{" + + "joinField=" + joinField + + '}'; + } + + final class W extends Weight { + + private final Weight approximationWeight; + + private float queryNorm; + private float queryWeight; + + W(Query query, Weight approximationWeight) { + super(query); + this.approximationWeight = approximationWeight; + } + + @Override + public Explanation explain(LeafReaderContext context, int doc) throws IOException { + SortedDocValues values = DocValues.getSorted(context.reader(), joinField); + if (values != null) { + int segmentOrd = values.getOrd(doc); + if (segmentOrd != -1) { + final float score; + if (globalOrds != null) { + long globalOrd = globalOrds.getGlobalOrds(context.ord).get(segmentOrd); + score = collector.scores.getScore((int) globalOrd); + } else { + score = collector.score(segmentOrd); + } + BytesRef joinValue = values.lookupOrd(segmentOrd); + return new ComplexExplanation(true, score, "Score based on join value " + joinValue.utf8ToString()); + } + } + return new ComplexExplanation(false, 0.0f, "Not a match"); + } + + @Override + public float getValueForNormalization() throws IOException { + queryWeight = getBoost(); + return queryWeight * queryWeight; + } + + @Override + public void normalize(float norm, float topLevelBoost) { + this.queryNorm = norm * topLevelBoost; + queryWeight *= this.queryNorm; + } + + @Override + public Scorer scorer(LeafReaderContext context, Bits acceptDocs) throws IOException { + SortedDocValues values = DocValues.getSorted(context.reader(), joinField); + if (values == null) { + return null; + } + + Scorer approximationScorer = approximationWeight.scorer(context, acceptDocs); + if (approximationScorer == null) { + return null; + } else if (globalOrds != null) { + return new OrdinalMapScorer(this, collector, values, approximationScorer, globalOrds.getGlobalOrds(context.ord)); + } else { + return new SegmentOrdinalScorer(this, collector, values, approximationScorer); + } + } + + } + + final static class OrdinalMapScorer extends BaseGlobalOrdinalScorer { + + final LongValues segmentOrdToGlobalOrdLookup; + final GlobalOrdinalsWithScoreCollector collector; + + public OrdinalMapScorer(Weight weight, GlobalOrdinalsWithScoreCollector collector, SortedDocValues values, Scorer approximationScorer, LongValues segmentOrdToGlobalOrdLookup) { + super(weight, collector.getCollectorOrdinals(), values, approximationScorer); + this.segmentOrdToGlobalOrdLookup = segmentOrdToGlobalOrdLookup; + this.collector = collector; + } + + @Override + public int advance(int target) throws IOException { + for (int docID = approximationScorer.advance(target); docID < NO_MORE_DOCS; docID = approximationScorer.nextDoc()) { + final long segmentOrd = values.getOrd(docID); + if (segmentOrd != -1) { + final long globalOrd = segmentOrdToGlobalOrdLookup.get(segmentOrd); + if (foundOrds.get(globalOrd)) { + score = collector.score((int) globalOrd); + return docID; + } + } + } + return NO_MORE_DOCS; + } + + @Override + protected TwoPhaseIterator createTwoPhaseIterator(DocIdSetIterator approximation) { + return new TwoPhaseIterator(approximation) { + + @Override + public boolean matches() throws IOException { + final long segmentOrd = values.getOrd(approximationScorer.docID()); + if (segmentOrd != -1) { + final long globalOrd = segmentOrdToGlobalOrdLookup.get(segmentOrd); + if (foundOrds.get(globalOrd)) { + score = collector.score((int) globalOrd); + return true; + } + } + return false; + } + + }; + } + } + + final static class SegmentOrdinalScorer extends BaseGlobalOrdinalScorer { + + final GlobalOrdinalsWithScoreCollector collector; + + public SegmentOrdinalScorer(Weight weight, GlobalOrdinalsWithScoreCollector collector, SortedDocValues values, Scorer approximationScorer) { + super(weight, collector.getCollectorOrdinals(), values, approximationScorer); + this.collector = collector; + } + + @Override + public int advance(int target) throws IOException { + for (int docID = approximationScorer.advance(target); docID < NO_MORE_DOCS; docID = approximationScorer.nextDoc()) { + final int segmentOrd = values.getOrd(docID); + if (segmentOrd != -1) { + if (foundOrds.get(segmentOrd)) { + score = collector.score(segmentOrd); + return docID; + } + } + } + return NO_MORE_DOCS; + } + + @Override + protected TwoPhaseIterator createTwoPhaseIterator(DocIdSetIterator approximation) { + return new TwoPhaseIterator(approximation) { + + @Override + public boolean matches() throws IOException { + final int segmentOrd = values.getOrd(approximationScorer.docID()); + if (segmentOrd != -1) { + if (foundOrds.get(segmentOrd)) { + score = collector.score(segmentOrd); + return true; + } + } + return false; + } + }; + } + } +} diff --git a/lucene/join/src/java/org/apache/lucene/search/join/JoinUtil.java b/lucene/join/src/java/org/apache/lucene/search/join/JoinUtil.java index 44abe7bc464..89ac5089797 100644 --- a/lucene/join/src/java/org/apache/lucene/search/join/JoinUtil.java +++ b/lucene/join/src/java/org/apache/lucene/search/join/JoinUtil.java @@ -17,7 +17,12 @@ package org.apache.lucene.search.join; * limitations under the License. */ +import org.apache.lucene.index.IndexReader; +import org.apache.lucene.index.LeafReader; +import org.apache.lucene.index.MultiDocValues; +import org.apache.lucene.index.SortedDocValues; import org.apache.lucene.search.IndexSearcher; +import org.apache.lucene.search.MatchNoDocsQuery; import org.apache.lucene.search.Query; import java.io.IOException; @@ -90,4 +95,78 @@ public final class JoinUtil { } } + /** + * A query time join using global ordinals over a dedicated join field. + * + * This join has certain restrictions and requirements: + * 1) A document can only refer to one other document. (but can be referred by one or more documents) + * 2) Documents on each side of the join must be distinguishable. Typically this can be done by adding an extra field + * that identifies the "from" and "to" side and then the fromQuery and toQuery must take the this into account. + * 3) There must be a single sorted doc values join field used by both the "from" and "to" documents. This join field + * should store the join values as UTF-8 strings. + * 4) An ordinal map must be provided that is created on top of the join field. + * + * @param joinField The {@link org.apache.lucene.index.SortedDocValues} field containing the join values + * @param fromQuery The query containing the actual user query. Also the fromQuery can only match "from" documents. + * @param toQuery The query identifying all documents on the "to" side. + * @param searcher The index searcher used to execute the from query + * @param scoreMode Instructs how scores from the fromQuery are mapped to the returned query + * @param ordinalMap The ordinal map constructed over the joinField. In case of a single segment index, no ordinal map + * needs to be provided. + * @return a {@link Query} instance that can be used to join documents based on the join field + * @throws IOException If I/O related errors occur + */ + public static Query createJoinQuery(String joinField, + Query fromQuery, + Query toQuery, + IndexSearcher searcher, + ScoreMode scoreMode, + MultiDocValues.OrdinalMap ordinalMap) throws IOException { + IndexReader indexReader = searcher.getIndexReader(); + int numSegments = indexReader.leaves().size(); + final long valueCount; + if (numSegments == 0) { + return new MatchNoDocsQuery(); + } else if (numSegments == 1) { + // No need to use the ordinal map, because there is just one segment. + ordinalMap = null; + LeafReader leafReader = searcher.getIndexReader().leaves().get(0).reader(); + SortedDocValues joinSortedDocValues = leafReader.getSortedDocValues(joinField); + if (joinSortedDocValues != null) { + valueCount = joinSortedDocValues.getValueCount(); + } else { + return new MatchNoDocsQuery(); + } + } else { + if (ordinalMap == null) { + throw new IllegalArgumentException("OrdinalMap is required, because there is more than 1 segment"); + } + valueCount = ordinalMap.getValueCount(); + } + + Query rewrittenFromQuery = searcher.rewrite(fromQuery); + if (scoreMode == ScoreMode.None) { + GlobalOrdinalsCollector globalOrdinalsCollector = new GlobalOrdinalsCollector(joinField, ordinalMap, valueCount); + searcher.search(fromQuery, globalOrdinalsCollector); + return new GlobalOrdinalsQuery(globalOrdinalsCollector.getCollectorOrdinals(), joinField, ordinalMap, toQuery, rewrittenFromQuery, indexReader); + } + + GlobalOrdinalsWithScoreCollector globalOrdinalsWithScoreCollector; + switch (scoreMode) { + case Total: + globalOrdinalsWithScoreCollector = new GlobalOrdinalsWithScoreCollector.Sum(joinField, ordinalMap, valueCount); + break; + case Max: + globalOrdinalsWithScoreCollector = new GlobalOrdinalsWithScoreCollector.Max(joinField, ordinalMap, valueCount); + break; + case Avg: + globalOrdinalsWithScoreCollector = new GlobalOrdinalsWithScoreCollector.Avg(joinField, ordinalMap, valueCount); + break; + default: + throw new IllegalArgumentException(String.format(Locale.ROOT, "Score mode %s isn't supported.", scoreMode)); + } + searcher.search(fromQuery, globalOrdinalsWithScoreCollector); + return new GlobalOrdinalsWithScoreQuery(globalOrdinalsWithScoreCollector, joinField, ordinalMap, toQuery, rewrittenFromQuery, indexReader); + } + } diff --git a/lucene/join/src/test/org/apache/lucene/search/join/TestJoinUtil.java b/lucene/join/src/test/org/apache/lucene/search/join/TestJoinUtil.java index 46fa0c1979b..bb2fb5ff0ac 100644 --- a/lucene/join/src/test/org/apache/lucene/search/join/TestJoinUtil.java +++ b/lucene/join/src/test/org/apache/lucene/search/join/TestJoinUtil.java @@ -17,19 +17,6 @@ package org.apache.lucene.search.join; * limitations under the License. */ -import java.io.IOException; -import java.util.ArrayList; -import java.util.Collections; -import java.util.Comparator; -import java.util.HashMap; -import java.util.HashSet; -import java.util.List; -import java.util.Locale; -import java.util.Map; -import java.util.Set; -import java.util.SortedSet; -import java.util.TreeSet; - import org.apache.lucene.analysis.MockAnalyzer; import org.apache.lucene.analysis.MockTokenizer; import org.apache.lucene.document.Document; @@ -38,27 +25,29 @@ import org.apache.lucene.document.SortedDocValuesField; import org.apache.lucene.document.SortedSetDocValuesField; import org.apache.lucene.document.TextField; import org.apache.lucene.index.BinaryDocValues; +import org.apache.lucene.index.DirectoryReader; import org.apache.lucene.index.DocValues; -import org.apache.lucene.index.PostingsEnum; import org.apache.lucene.index.IndexReader; import org.apache.lucene.index.LeafReader; import org.apache.lucene.index.LeafReaderContext; +import org.apache.lucene.index.MultiDocValues; import org.apache.lucene.index.MultiFields; +import org.apache.lucene.index.NoMergePolicy; +import org.apache.lucene.index.PostingsEnum; import org.apache.lucene.index.RandomIndexWriter; import org.apache.lucene.index.SlowCompositeReaderWrapper; +import org.apache.lucene.index.SortedDocValues; import org.apache.lucene.index.SortedSetDocValues; import org.apache.lucene.index.Term; import org.apache.lucene.index.Terms; import org.apache.lucene.index.TermsEnum; import org.apache.lucene.search.BooleanClause; import org.apache.lucene.search.BooleanQuery; -import org.apache.lucene.search.Collector; import org.apache.lucene.search.DocIdSetIterator; import org.apache.lucene.search.Explanation; -import org.apache.lucene.search.FilterLeafCollector; import org.apache.lucene.search.IndexSearcher; -import org.apache.lucene.search.LeafCollector; import org.apache.lucene.search.MatchAllDocsQuery; +import org.apache.lucene.search.MultiCollector; import org.apache.lucene.search.Query; import org.apache.lucene.search.ScoreDoc; import org.apache.lucene.search.Scorer; @@ -74,8 +63,22 @@ import org.apache.lucene.util.BytesRef; import org.apache.lucene.util.FixedBitSet; import org.apache.lucene.util.LuceneTestCase; import org.apache.lucene.util.TestUtil; +import org.apache.lucene.util.packed.PackedInts; import org.junit.Test; +import java.io.IOException; +import java.util.ArrayList; +import java.util.Collections; +import java.util.Comparator; +import java.util.HashMap; +import java.util.HashSet; +import java.util.List; +import java.util.Locale; +import java.util.Map; +import java.util.Set; +import java.util.SortedSet; +import java.util.TreeSet; + public class TestJoinUtil extends LuceneTestCase { public void testSimple() throws Exception { @@ -169,6 +172,180 @@ public class TestJoinUtil extends LuceneTestCase { dir.close(); } + public void testSimpleOrdinalsJoin() throws Exception { + final String idField = "id"; + final String productIdField = "productId"; + // A field indicating to what type a document belongs, which is then used to distinques between documents during joining. + final String typeField = "type"; + // A single sorted doc values field that holds the join values for all document types. + // Typically during indexing a schema will automatically create this field with the values + final String joinField = idField + productIdField; + + Directory dir = newDirectory(); + RandomIndexWriter w = new RandomIndexWriter( + random(), + dir, + newIndexWriterConfig(new MockAnalyzer(random())).setMergePolicy(NoMergePolicy.INSTANCE)); + + // 0 + Document doc = new Document(); + doc.add(new TextField(idField, "1", Field.Store.NO)); + doc.add(new TextField(typeField, "product", Field.Store.NO)); + doc.add(new TextField("description", "random text", Field.Store.NO)); + doc.add(new TextField("name", "name1", Field.Store.NO)); + doc.add(new SortedDocValuesField(joinField, new BytesRef("1"))); + w.addDocument(doc); + + // 1 + doc = new Document(); + doc.add(new TextField(productIdField, "1", Field.Store.NO)); + doc.add(new TextField(typeField, "price", Field.Store.NO)); + doc.add(new TextField("price", "10.0", Field.Store.NO)); + doc.add(new SortedDocValuesField(joinField, new BytesRef("1"))); + w.addDocument(doc); + + // 2 + doc = new Document(); + doc.add(new TextField(productIdField, "1", Field.Store.NO)); + doc.add(new TextField(typeField, "price", Field.Store.NO)); + doc.add(new TextField("price", "20.0", Field.Store.NO)); + doc.add(new SortedDocValuesField(joinField, new BytesRef("1"))); + w.addDocument(doc); + + // 3 + doc = new Document(); + doc.add(new TextField(idField, "2", Field.Store.NO)); + doc.add(new TextField(typeField, "product", Field.Store.NO)); + doc.add(new TextField("description", "more random text", Field.Store.NO)); + doc.add(new TextField("name", "name2", Field.Store.NO)); + doc.add(new SortedDocValuesField(joinField, new BytesRef("2"))); + w.addDocument(doc); + w.commit(); + + // 4 + doc = new Document(); + doc.add(new TextField(productIdField, "2", Field.Store.NO)); + doc.add(new TextField(typeField, "price", Field.Store.NO)); + doc.add(new TextField("price", "10.0", Field.Store.NO)); + doc.add(new SortedDocValuesField(joinField, new BytesRef("2"))); + w.addDocument(doc); + + // 5 + doc = new Document(); + doc.add(new TextField(productIdField, "2", Field.Store.NO)); + doc.add(new TextField(typeField, "price", Field.Store.NO)); + doc.add(new TextField("price", "20.0", Field.Store.NO)); + doc.add(new SortedDocValuesField(joinField, new BytesRef("2"))); + w.addDocument(doc); + + IndexSearcher indexSearcher = new IndexSearcher(w.getReader()); + w.close(); + + IndexReader r = indexSearcher.getIndexReader(); + SortedDocValues[] values = new SortedDocValues[r.leaves().size()]; + for (int i = 0; i < values.length; i++) { + LeafReader leafReader = r.leaves().get(i).reader(); + values[i] = DocValues.getSorted(leafReader, joinField); + } + MultiDocValues.OrdinalMap ordinalMap = MultiDocValues.OrdinalMap.build( + r.getCoreCacheKey(), values, PackedInts.DEFAULT + ); + + Query toQuery = new TermQuery(new Term(typeField, "price")); + Query fromQuery = new TermQuery(new Term("name", "name2")); + // Search for product and return prices + Query joinQuery = JoinUtil.createJoinQuery(joinField, fromQuery, toQuery, indexSearcher, ScoreMode.None, ordinalMap); + TopDocs result = indexSearcher.search(joinQuery, 10); + assertEquals(2, result.totalHits); + assertEquals(4, result.scoreDocs[0].doc); + assertEquals(5, result.scoreDocs[1].doc); + + fromQuery = new TermQuery(new Term("name", "name1")); + joinQuery = JoinUtil.createJoinQuery(joinField, fromQuery, toQuery, indexSearcher, ScoreMode.None, ordinalMap); + result = indexSearcher.search(joinQuery, 10); + assertEquals(2, result.totalHits); + assertEquals(1, result.scoreDocs[0].doc); + assertEquals(2, result.scoreDocs[1].doc); + + // Search for prices and return products + fromQuery = new TermQuery(new Term("price", "20.0")); + toQuery = new TermQuery(new Term(typeField, "product")); + joinQuery = JoinUtil.createJoinQuery(joinField, fromQuery, toQuery, indexSearcher, ScoreMode.None, ordinalMap); + result = indexSearcher.search(joinQuery, 10); + assertEquals(2, result.totalHits); + assertEquals(0, result.scoreDocs[0].doc); + assertEquals(3, result.scoreDocs[1].doc); + + indexSearcher.getIndexReader().close(); + dir.close(); + } + + public void testRandomOrdinalsJoin() throws Exception { + Directory dir = newDirectory(); + RandomIndexWriter w = new RandomIndexWriter( + random(), + dir, + newIndexWriterConfig(new MockAnalyzer(random(), MockTokenizer.KEYWORD, false)).setMergePolicy(newLogMergePolicy()) + ); + IndexIterationContext context = createContext(100, w, false, true); + + w.forceMerge(1); + + w.close(); + IndexReader topLevelReader = DirectoryReader.open(dir); + + SortedDocValues[] values = new SortedDocValues[topLevelReader.leaves().size()]; + for (LeafReaderContext leadContext : topLevelReader.leaves()) { + values[leadContext.ord] = DocValues.getSorted(leadContext.reader(), "join_field"); + } + context.ordinalMap = MultiDocValues.OrdinalMap.build( + topLevelReader.getCoreCacheKey(), values, PackedInts.DEFAULT + ); + IndexSearcher indexSearcher = newSearcher(topLevelReader); + + int r = random().nextInt(context.randomUniqueValues.length); + boolean from = context.randomFrom[r]; + String randomValue = context.randomUniqueValues[r]; + BitSet expectedResult = createExpectedResult(randomValue, from, indexSearcher.getIndexReader(), context); + + final Query actualQuery = new TermQuery(new Term("value", randomValue)); + if (VERBOSE) { + System.out.println("actualQuery=" + actualQuery); + } + final ScoreMode scoreMode = ScoreMode.values()[random().nextInt(ScoreMode.values().length)]; + if (VERBOSE) { + System.out.println("scoreMode=" + scoreMode); + } + + final Query joinQuery; + if (from) { + BooleanQuery fromQuery = new BooleanQuery(); + fromQuery.add(new TermQuery(new Term("type", "from")), BooleanClause.Occur.FILTER); + fromQuery.add(actualQuery, BooleanClause.Occur.MUST); + Query toQuery = new TermQuery(new Term("type", "to")); + joinQuery = JoinUtil.createJoinQuery("join_field", fromQuery, toQuery, indexSearcher, scoreMode, context.ordinalMap); + } else { + BooleanQuery fromQuery = new BooleanQuery(); + fromQuery.add(new TermQuery(new Term("type", "to")), BooleanClause.Occur.FILTER); + fromQuery.add(actualQuery, BooleanClause.Occur.MUST); + Query toQuery = new TermQuery(new Term("type", "from")); + joinQuery = JoinUtil.createJoinQuery("join_field", fromQuery, toQuery, indexSearcher, scoreMode, context.ordinalMap); + } + if (VERBOSE) { + System.out.println("joinQuery=" + joinQuery); + } + + final BitSet actualResult = new FixedBitSet(indexSearcher.getIndexReader().maxDoc()); + final TopScoreDocCollector topScoreDocCollector = TopScoreDocCollector.create(10); + indexSearcher.search(joinQuery, MultiCollector.wrap(new BitSetCollector(actualResult), topScoreDocCollector)); + assertBitSet(expectedResult, actualResult, indexSearcher); + TopDocs expectedTopDocs = createExpectedTopDocs(randomValue, from, scoreMode, context); + TopDocs actualTopDocs = topScoreDocCollector.topDocs(); + assertTopDocs(expectedTopDocs, actualTopDocs, scoreMode, indexSearcher, joinQuery); + topLevelReader.close(); + dir.close(); + } + // TermsWithScoreCollector.MV.Avg forgets to grow beyond TermsWithScoreCollector.INITIAL_ARRAY_SIZE public void testOverflowTermsWithScoreCollector() throws Exception { test300spartans(true, ScoreMode.Avg); @@ -218,7 +395,7 @@ public class TestJoinUtil extends LuceneTestCase { TopDocs result = indexSearcher.search(joinQuery, 10); assertEquals(1, result.totalHits); assertEquals(0, result.scoreDocs[0].doc); - + indexSearcher.getIndexReader().close(); dir.close(); @@ -310,7 +487,7 @@ public class TestJoinUtil extends LuceneTestCase { assertFalse("optimized bulkScorer was not used for join query embedded in boolean query!", sawFive); } } - + @Override public boolean needsScores() { return false; @@ -448,7 +625,7 @@ public class TestJoinUtil extends LuceneTestCase { dir, newIndexWriterConfig(new MockAnalyzer(random(), MockTokenizer.KEYWORD, false)).setMergePolicy(newLogMergePolicy()) ); - IndexIterationContext context = createContext(numberOfDocumentsToIndex, w, multipleValuesPerDocument); + IndexIterationContext context = createContext(numberOfDocumentsToIndex, w, multipleValuesPerDocument, false); IndexReader topLevelReader = w.getReader(); w.close(); @@ -485,73 +662,64 @@ public class TestJoinUtil extends LuceneTestCase { // Need to know all documents that have matches. TopDocs doesn't give me that and then I'd be also testing TopDocsCollector... final BitSet actualResult = new FixedBitSet(indexSearcher.getIndexReader().maxDoc()); final TopScoreDocCollector topScoreDocCollector = TopScoreDocCollector.create(10); - indexSearcher.search(joinQuery, new Collector() { - - @Override - public LeafCollector getLeafCollector(LeafReaderContext context) throws IOException { - final int docBase = context.docBase; - final LeafCollector in = topScoreDocCollector.getLeafCollector(context); - return new FilterLeafCollector(in) { - - @Override - public void collect(int doc) throws IOException { - super.collect(doc); - actualResult.set(doc + docBase); - } - }; - } - - @Override - public boolean needsScores() { - return topScoreDocCollector.needsScores(); - } - }); + indexSearcher.search(joinQuery, MultiCollector.wrap(new BitSetCollector(actualResult), topScoreDocCollector)); // Asserting bit set... - if (VERBOSE) { - System.out.println("expected cardinality:" + expectedResult.cardinality()); - DocIdSetIterator iterator = new BitSetIterator(expectedResult, expectedResult.cardinality()); - for (int doc = iterator.nextDoc(); doc != DocIdSetIterator.NO_MORE_DOCS; doc = iterator.nextDoc()) { - System.out.println(String.format(Locale.ROOT, "Expected doc[%d] with id value %s", doc, indexSearcher.doc(doc).get("id"))); - } - System.out.println("actual cardinality:" + actualResult.cardinality()); - iterator = new BitSetIterator(actualResult, actualResult.cardinality()); - for (int doc = iterator.nextDoc(); doc != DocIdSetIterator.NO_MORE_DOCS; doc = iterator.nextDoc()) { - System.out.println(String.format(Locale.ROOT, "Actual doc[%d] with id value %s", doc, indexSearcher.doc(doc).get("id"))); - } - } - assertEquals(expectedResult, actualResult); - + assertBitSet(expectedResult, actualResult, indexSearcher); // Asserting TopDocs... TopDocs expectedTopDocs = createExpectedTopDocs(randomValue, from, scoreMode, context); TopDocs actualTopDocs = topScoreDocCollector.topDocs(); - assertEquals(expectedTopDocs.totalHits, actualTopDocs.totalHits); - assertEquals(expectedTopDocs.scoreDocs.length, actualTopDocs.scoreDocs.length); - if (scoreMode == ScoreMode.None) { - continue; - } - - assertEquals(expectedTopDocs.getMaxScore(), actualTopDocs.getMaxScore(), 0.0f); - for (int i = 0; i < expectedTopDocs.scoreDocs.length; i++) { - if (VERBOSE) { - System.out.printf(Locale.ENGLISH, "Expected doc: %d | Actual doc: %d\n", expectedTopDocs.scoreDocs[i].doc, actualTopDocs.scoreDocs[i].doc); - System.out.printf(Locale.ENGLISH, "Expected score: %f | Actual score: %f\n", expectedTopDocs.scoreDocs[i].score, actualTopDocs.scoreDocs[i].score); - } - assertEquals(expectedTopDocs.scoreDocs[i].doc, actualTopDocs.scoreDocs[i].doc); - assertEquals(expectedTopDocs.scoreDocs[i].score, actualTopDocs.scoreDocs[i].score, 0.0f); - Explanation explanation = indexSearcher.explain(joinQuery, expectedTopDocs.scoreDocs[i].doc); - assertEquals(expectedTopDocs.scoreDocs[i].score, explanation.getValue(), 0.0f); - } + assertTopDocs(expectedTopDocs, actualTopDocs, scoreMode, indexSearcher, joinQuery); } topLevelReader.close(); dir.close(); } } - private IndexIterationContext createContext(int nDocs, RandomIndexWriter writer, boolean multipleValuesPerDocument) throws IOException { - return createContext(nDocs, writer, writer, multipleValuesPerDocument); + private void assertBitSet(BitSet expectedResult, BitSet actualResult, IndexSearcher indexSearcher) throws IOException { + if (VERBOSE) { + System.out.println("expected cardinality:" + expectedResult.cardinality()); + DocIdSetIterator iterator = new BitSetIterator(expectedResult, expectedResult.cardinality()); + for (int doc = iterator.nextDoc(); doc != DocIdSetIterator.NO_MORE_DOCS; doc = iterator.nextDoc()) { + System.out.println(String.format(Locale.ROOT, "Expected doc[%d] with id value %s", doc, indexSearcher.doc(doc).get("id"))); + } + System.out.println("actual cardinality:" + actualResult.cardinality()); + iterator = new BitSetIterator(actualResult, actualResult.cardinality()); + for (int doc = iterator.nextDoc(); doc != DocIdSetIterator.NO_MORE_DOCS; doc = iterator.nextDoc()) { + System.out.println(String.format(Locale.ROOT, "Actual doc[%d] with id value %s", doc, indexSearcher.doc(doc).get("id"))); + } + } + assertEquals(expectedResult, actualResult); } - private IndexIterationContext createContext(int nDocs, RandomIndexWriter fromWriter, RandomIndexWriter toWriter, boolean multipleValuesPerDocument) throws IOException { + private void assertTopDocs(TopDocs expectedTopDocs, TopDocs actualTopDocs, ScoreMode scoreMode, IndexSearcher indexSearcher, Query joinQuery) throws IOException { + assertEquals(expectedTopDocs.totalHits, actualTopDocs.totalHits); + assertEquals(expectedTopDocs.scoreDocs.length, actualTopDocs.scoreDocs.length); + if (scoreMode == ScoreMode.None) { + return; + } + + assertEquals(expectedTopDocs.getMaxScore(), actualTopDocs.getMaxScore(), 0.0f); + for (int i = 0; i < expectedTopDocs.scoreDocs.length; i++) { + if (VERBOSE) { + System.out.printf(Locale.ENGLISH, "Expected doc: %d | Actual doc: %d\n", expectedTopDocs.scoreDocs[i].doc, actualTopDocs.scoreDocs[i].doc); + System.out.printf(Locale.ENGLISH, "Expected score: %f | Actual score: %f\n", expectedTopDocs.scoreDocs[i].score, actualTopDocs.scoreDocs[i].score); + } + assertEquals(expectedTopDocs.scoreDocs[i].doc, actualTopDocs.scoreDocs[i].doc); + assertEquals(expectedTopDocs.scoreDocs[i].score, actualTopDocs.scoreDocs[i].score, 0.0f); + Explanation explanation = indexSearcher.explain(joinQuery, expectedTopDocs.scoreDocs[i].doc); + assertEquals(expectedTopDocs.scoreDocs[i].score, explanation.getValue(), 0.0f); + } + } + + private IndexIterationContext createContext(int nDocs, RandomIndexWriter writer, boolean multipleValuesPerDocument, boolean ordinalJoin) throws IOException { + return createContext(nDocs, writer, writer, multipleValuesPerDocument, ordinalJoin); + } + + private IndexIterationContext createContext(int nDocs, RandomIndexWriter fromWriter, RandomIndexWriter toWriter, boolean multipleValuesPerDocument, boolean globalOrdinalJoin) throws IOException { + if (globalOrdinalJoin) { + assertFalse("ordinal join doesn't support multiple join values per document", multipleValuesPerDocument); + } + IndexIterationContext context = new IndexIterationContext(); int numRandomValues = nDocs / 2; context.randomUniqueValues = new String[numRandomValues]; @@ -560,8 +728,8 @@ public class TestJoinUtil extends LuceneTestCase { for (int i = 0; i < numRandomValues; i++) { String uniqueRandomValue; do { - uniqueRandomValue = TestUtil.randomRealisticUnicodeString(random()); -// uniqueRandomValue = _TestUtil.randomSimpleString(random); +// uniqueRandomValue = TestUtil.randomRealisticUnicodeString(random()); + uniqueRandomValue = TestUtil.randomSimpleString(random()); } while ("".equals(uniqueRandomValue) || trackSet.contains(uniqueRandomValue)); // Generate unique values and empty strings aren't allowed. trackSet.add(uniqueRandomValue); @@ -581,15 +749,18 @@ public class TestJoinUtil extends LuceneTestCase { boolean from = context.randomFrom[randomI]; int numberOfLinkValues = multipleValuesPerDocument ? 2 + random().nextInt(10) : 1; docs[i] = new RandomDoc(id, numberOfLinkValues, value, from); + if (globalOrdinalJoin) { + document.add(newStringField("type", from ? "from" : "to", Field.Store.NO)); + } for (int j = 0; j < numberOfLinkValues; j++) { String linkValue = context.randomUniqueValues[random().nextInt(context.randomUniqueValues.length)]; docs[i].linkValues.add(linkValue); if (from) { if (!context.fromDocuments.containsKey(linkValue)) { - context.fromDocuments.put(linkValue, new ArrayList()); + context.fromDocuments.put(linkValue, new ArrayList<>()); } if (!context.randomValueFromDocs.containsKey(value)) { - context.randomValueFromDocs.put(value, new ArrayList()); + context.randomValueFromDocs.put(value, new ArrayList<>()); } context.fromDocuments.get(linkValue).add(docs[i]); @@ -600,12 +771,15 @@ public class TestJoinUtil extends LuceneTestCase { } else { document.add(new SortedDocValuesField("from", new BytesRef(linkValue))); } + if (globalOrdinalJoin) { + document.add(new SortedDocValuesField("join_field", new BytesRef(linkValue))); + } } else { if (!context.toDocuments.containsKey(linkValue)) { - context.toDocuments.put(linkValue, new ArrayList()); + context.toDocuments.put(linkValue, new ArrayList<>()); } if (!context.randomValueToDocs.containsKey(value)) { - context.randomValueToDocs.put(value, new ArrayList()); + context.randomValueToDocs.put(value, new ArrayList<>()); } context.toDocuments.get(linkValue).add(docs[i]); @@ -616,6 +790,9 @@ public class TestJoinUtil extends LuceneTestCase { } else { document.add(new SortedDocValuesField("to", new BytesRef(linkValue))); } + if (globalOrdinalJoin) { + document.add(new SortedDocValuesField("join_field", new BytesRef(linkValue))); + } } } @@ -707,6 +884,9 @@ public class TestJoinUtil extends LuceneTestCase { if (joinScore == null) { joinValueToJoinScores.put(BytesRef.deepCopyOf(joinValue), joinScore = new JoinScore()); } + if (VERBOSE) { + System.out.println("expected val=" + joinValue.utf8ToString() + " expected score=" + scorer.score()); + } joinScore.addScore(scorer.score()); } @@ -720,7 +900,7 @@ public class TestJoinUtil extends LuceneTestCase { public void setScorer(Scorer scorer) { this.scorer = scorer; } - + @Override public boolean needsScores() { return true; @@ -777,7 +957,7 @@ public class TestJoinUtil extends LuceneTestCase { @Override public void setScorer(Scorer scorer) {} - + @Override public boolean needsScores() { return false; @@ -875,6 +1055,7 @@ public class TestJoinUtil extends LuceneTestCase { Map> fromHitsToJoinScore = new HashMap<>(); Map> toHitsToJoinScore = new HashMap<>(); + MultiDocValues.OrdinalMap ordinalMap; } private static class RandomDoc { @@ -922,4 +1103,29 @@ public class TestJoinUtil extends LuceneTestCase { } + private static class BitSetCollector extends SimpleCollector { + + private final BitSet bitSet; + private int docBase; + + private BitSetCollector(BitSet bitSet) { + this.bitSet = bitSet; + } + + @Override + public void collect(int doc) throws IOException { + bitSet.set(docBase + doc); + } + + @Override + protected void doSetNextReader(LeafReaderContext context) throws IOException { + docBase = context.docBase; + } + + @Override + public boolean needsScores() { + return false; + } + } + } From 79bf72708b94c01691027c9a30364b96a4705ccf Mon Sep 17 00:00:00 2001 From: Varun Thacker Date: Fri, 3 Apr 2015 09:46:54 +0000 Subject: [PATCH 4/9] SOLR-6637: Solr should have a way to restore a core git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1671022 13f79535-47bb-0310-9956-ffa450edef68 --- solr/CHANGES.txt | 3 + .../org/apache/solr/handler/IndexFetcher.java | 101 ++++---- .../solr/handler/OldBackupDirectory.java | 50 ++++ .../solr/handler/ReplicationHandler.java | 149 +++++++++-- .../org/apache/solr/handler/RestoreCore.java | 149 +++++++++++ .../org/apache/solr/handler/SnapShooter.java | 35 +-- .../handler/TestReplicationHandlerBackup.java | 77 ++---- .../apache/solr/handler/TestRestoreCore.java | 243 ++++++++++++++++++ 8 files changed, 646 insertions(+), 161 deletions(-) create mode 100644 solr/core/src/java/org/apache/solr/handler/OldBackupDirectory.java create mode 100644 solr/core/src/java/org/apache/solr/handler/RestoreCore.java create mode 100644 solr/core/src/test/org/apache/solr/handler/TestRestoreCore.java diff --git a/solr/CHANGES.txt b/solr/CHANGES.txt index fc5df697f80..215b78e0de3 100644 --- a/solr/CHANGES.txt +++ b/solr/CHANGES.txt @@ -78,6 +78,9 @@ Detailed Change List New Features ---------------------- +* SOLR-6637: Solr should have a way to restore a core from a backed up index. + (Varun Thacker, noble, shalin) + Bug Fixes ---------------------- diff --git a/solr/core/src/java/org/apache/solr/handler/IndexFetcher.java b/solr/core/src/java/org/apache/solr/handler/IndexFetcher.java index 605ba929ec3..1983282ea89 100644 --- a/solr/core/src/java/org/apache/solr/handler/IndexFetcher.java +++ b/solr/core/src/java/org/apache/solr/handler/IndexFetcher.java @@ -29,6 +29,7 @@ import java.nio.channels.FileChannel; import java.nio.charset.StandardCharsets; import java.nio.file.Files; import java.nio.file.NoSuchFileException; +import java.nio.file.Paths; import java.text.SimpleDateFormat; import java.util.ArrayList; import java.util.Arrays; @@ -246,31 +247,31 @@ public class IndexFetcher { } } - boolean fetchLatestIndex(final SolrCore core, boolean forceReplication) throws IOException, InterruptedException { - return fetchLatestIndex(core, forceReplication, false); + boolean fetchLatestIndex(boolean forceReplication) throws IOException, InterruptedException { + return fetchLatestIndex(forceReplication, false); } /** * This command downloads all the necessary files from master to install a index commit point. Only changed files are * downloaded. It also downloads the conf files (if they are modified). * - * @param core the SolrCore * @param forceReplication force a replication in all cases * @param forceCoreReload force a core reload in all cases * @return true on success, false if slave is already in sync * @throws IOException if an exception occurs */ - boolean fetchLatestIndex(final SolrCore core, boolean forceReplication, boolean forceCoreReload) throws IOException, InterruptedException { + boolean fetchLatestIndex(boolean forceReplication, boolean forceCoreReload) throws IOException, InterruptedException { + boolean cleanupDone = false; boolean successfulInstall = false; replicationStartTime = System.currentTimeMillis(); Directory tmpIndexDir = null; - String tmpIndex = null; + String tmpIndex; Directory indexDir = null; - String indexDirPath = null; + String indexDirPath; boolean deleteTmpIdxDir = true; - if (!core.getSolrCoreState().getLastReplicateIndexSuccess()) { + if (!solrCore.getSolrCoreState().getLastReplicateIndexSuccess()) { // if the last replication was not a success, we force a full replication // when we are a bit more confident we may want to try a partial replication // if the error is connection related or something, but we have to be careful @@ -279,7 +280,7 @@ public class IndexFetcher { try { //get the current 'replicateable' index version in the master - NamedList response = null; + NamedList response; try { response = getLatestVersion(); } catch (Exception e) { @@ -290,12 +291,12 @@ public class IndexFetcher { long latestGeneration = (Long) response.get(GENERATION); // TODO: make sure that getLatestCommit only returns commit points for the main index (i.e. no side-car indexes) - IndexCommit commit = core.getDeletionPolicy().getLatestCommit(); + IndexCommit commit = solrCore.getDeletionPolicy().getLatestCommit(); if (commit == null) { // Presumably the IndexWriter hasn't been opened yet, and hence the deletion policy hasn't been updated with commit points RefCounted searcherRefCounted = null; try { - searcherRefCounted = core.getNewestSearcher(false); + searcherRefCounted = solrCore.getNewestSearcher(false); if (searcherRefCounted == null) { LOG.warn("No open searcher found - fetch aborted"); return false; @@ -312,15 +313,14 @@ public class IndexFetcher { if (forceReplication && commit.getGeneration() != 0) { // since we won't get the files for an empty index, // we just clear ours and commit - RefCounted iw = core.getUpdateHandler().getSolrCoreState().getIndexWriter(core); + RefCounted iw = solrCore.getUpdateHandler().getSolrCoreState().getIndexWriter(solrCore); try { iw.get().deleteAll(); } finally { iw.decref(); } - SolrQueryRequest req = new LocalSolrQueryRequest(core, - new ModifiableSolrParams()); - core.getUpdateHandler().commit(new CommitUpdateCommand(req, false)); + SolrQueryRequest req = new LocalSolrQueryRequest(solrCore, new ModifiableSolrParams()); + solrCore.getUpdateHandler().commit(new CommitUpdateCommand(req, false)); } //there is nothing to be replicated @@ -340,7 +340,9 @@ public class IndexFetcher { // get the list of files first fetchFileList(latestGeneration); // this can happen if the commit point is deleted before we fetch the file list. - if(filesToDownload.isEmpty()) return false; + if (filesToDownload.isEmpty()) { + return false; + } LOG.info("Number of files in latest index in master: " + filesToDownload.size()); // Create the sync service @@ -354,13 +356,13 @@ public class IndexFetcher { || commit.getGeneration() >= latestGeneration || forceReplication; String tmpIdxDirName = "index." + new SimpleDateFormat(SnapShooter.DATE_FMT, Locale.ROOT).format(new Date()); - tmpIndex = createTempindexDir(core, tmpIdxDirName); + tmpIndex = Paths.get(solrCore.getDataDir(), tmpIdxDirName).toString(); - tmpIndexDir = core.getDirectoryFactory().get(tmpIndex, DirContext.DEFAULT, core.getSolrConfig().indexConfig.lockType); + tmpIndexDir = solrCore.getDirectoryFactory().get(tmpIndex, DirContext.DEFAULT, solrCore.getSolrConfig().indexConfig.lockType); // cindex dir... - indexDirPath = core.getIndexDir(); - indexDir = core.getDirectoryFactory().get(indexDirPath, DirContext.DEFAULT, core.getSolrConfig().indexConfig.lockType); + indexDirPath = solrCore.getIndexDir(); + indexDir = solrCore.getDirectoryFactory().get(indexDirPath, DirContext.DEFAULT, solrCore.getSolrConfig().indexConfig.lockType); try { @@ -404,7 +406,7 @@ public class IndexFetcher { } finally { writer.decref(); } - solrCore.getUpdateHandler().getSolrCoreState().closeIndexWriter(core, true); + solrCore.getUpdateHandler().getSolrCoreState().closeIndexWriter(solrCore, true); } boolean reloadCore = false; @@ -422,7 +424,7 @@ public class IndexFetcher { reloadCore = true; downloadConfFiles(confFilesToDownload, latestGeneration); if (isFullCopyNeeded) { - successfulInstall = modifyIndexProps(tmpIdxDirName); + successfulInstall = IndexFetcher.modifyIndexProps(solrCore, tmpIdxDirName); deleteTmpIdxDir = false; } else { successfulInstall = moveIndexFiles(tmpIndexDir, indexDir); @@ -433,8 +435,8 @@ public class IndexFetcher { // may be closed if (indexDir != null) { LOG.info("removing old index directory " + indexDir); - core.getDirectoryFactory().doneWithDirectory(indexDir); - core.getDirectoryFactory().remove(indexDir); + solrCore.getDirectoryFactory().doneWithDirectory(indexDir); + solrCore.getDirectoryFactory().remove(indexDir); } } @@ -446,7 +448,7 @@ public class IndexFetcher { } else { terminateAndWaitFsyncService(); if (isFullCopyNeeded) { - successfulInstall = modifyIndexProps(tmpIdxDirName); + successfulInstall = IndexFetcher.modifyIndexProps(solrCore, tmpIdxDirName); deleteTmpIdxDir = false; } else { successfulInstall = moveIndexFiles(tmpIndexDir, indexDir); @@ -458,13 +460,13 @@ public class IndexFetcher { } } finally { if (!isFullCopyNeeded) { - solrCore.getUpdateHandler().getSolrCoreState().openIndexWriter(core); + solrCore.getUpdateHandler().getSolrCoreState().openIndexWriter(solrCore); } } // we must reload the core after we open the IW back up if (successfulInstall && (reloadCore || forceCoreReload)) { - LOG.info("Reloading SolrCore {}", core.getName()); + LOG.info("Reloading SolrCore {}", solrCore.getName()); reloadCore(); } @@ -474,8 +476,8 @@ public class IndexFetcher { // may be closed if (indexDir != null) { LOG.info("removing old index directory " + indexDir); - core.getDirectoryFactory().doneWithDirectory(indexDir); - core.getDirectoryFactory().remove(indexDir); + solrCore.getDirectoryFactory().doneWithDirectory(indexDir); + solrCore.getDirectoryFactory().remove(indexDir); } } if (isFullCopyNeeded) { @@ -486,13 +488,13 @@ public class IndexFetcher { } if (!isFullCopyNeeded && !forceReplication && !successfulInstall) { - cleanup(core, tmpIndexDir, indexDir, deleteTmpIdxDir, successfulInstall); + cleanup(solrCore, tmpIndexDir, indexDir, deleteTmpIdxDir, successfulInstall); cleanupDone = true; // we try with a full copy of the index LOG.warn( "Replication attempt was not successful - trying a full index replication reloadCore={}", reloadCore); - successfulInstall = fetchLatestIndex(core, true, reloadCore); + successfulInstall = fetchLatestIndex(true, reloadCore); } replicationStartTime = 0; @@ -505,15 +507,15 @@ public class IndexFetcher { } catch (InterruptedException e) { throw new InterruptedException("Index fetch interrupted"); } catch (Exception e) { - throw new SolrException(SolrException.ErrorCode.SERVER_ERROR, "Index fetch failed : ", e); + throw new SolrException(ErrorCode.SERVER_ERROR, "Index fetch failed : ", e); } } finally { if (!cleanupDone) { - cleanup(core, tmpIndexDir, indexDir, deleteTmpIdxDir, successfulInstall); + cleanup(solrCore, tmpIndexDir, indexDir, deleteTmpIdxDir, successfulInstall); } } } - + private void cleanup(final SolrCore core, Directory tmpIndexDir, Directory indexDir, boolean deleteTmpIdxDir, boolean successfulInstall) throws IOException { try { @@ -524,9 +526,9 @@ public class IndexFetcher { LOG.error("caught", e); } } - + core.getUpdateHandler().getSolrCoreState().setLastReplicateIndexSuccess(successfulInstall); - + filesToDownload = filesDownloaded = confFilesDownloaded = confFilesToDownload = null; replicationStartTime = 0; dirFileFetcher = null; @@ -545,11 +547,11 @@ public class IndexFetcher { SolrException.log(LOG, "Error removing directory " + tmpIndexDir, e); } } - + if (tmpIndexDir != null) { core.getDirectoryFactory().release(tmpIndexDir); } - + if (indexDir != null) { core.getDirectoryFactory().release(indexDir); } @@ -719,15 +721,6 @@ public class IndexFetcher { } - /** - * All the files are copied to a temp dir first - */ - private String createTempindexDir(SolrCore core, String tmpIdxDirName) { - // TODO: there should probably be a DirectoryFactory#concatPath(parent, name) - // or something - return core.getDataDir() + tmpIdxDirName; - } - private void reloadCore() { final CountDownLatch latch = new CountDownLatch(1); new Thread() { @@ -815,12 +808,12 @@ public class IndexFetcher { || filename.startsWith("segments_") || size < _100K); } - static class CompareResult { + protected static class CompareResult { boolean equal = false; boolean checkSummed = false; } - - private CompareResult compareFile(Directory indexDir, String filename, Long backupIndexFileLen, Long backupIndexFileChecksum) { + + protected static CompareResult compareFile(Directory indexDir, String filename, Long backupIndexFileLen, Long backupIndexFileChecksum) { CompareResult compareResult = new CompareResult(); try { try (final IndexInput indexInput = indexDir.openInput(filename, IOContext.READONCE)) { @@ -887,8 +880,8 @@ public class IndexFetcher { } /** - * All the files which are common between master and slave must have same size else we assume they are - * not compatible (stale). + * All the files which are common between master and slave must have same size and same checksum else we assume + * they are not compatible (stale). * * @return true if the index stale and we need to download a fresh copy, false otherwise. * @throws IOException if low level io error @@ -1034,7 +1027,7 @@ public class IndexFetcher { /** * If the index is stale by any chance, load index from a different dir in the data dir. */ - private boolean modifyIndexProps(String tmpIdxDirName) { + protected static boolean modifyIndexProps(SolrCore solrCore, String tmpIdxDirName) { LOG.info("New index installed. Updating index properties... index="+tmpIdxDirName); Properties p = new Properties(); Directory dir = null; @@ -1042,7 +1035,7 @@ public class IndexFetcher { dir = solrCore.getDirectoryFactory().get(solrCore.getDataDir(), DirContext.META_DATA, solrCore.getSolrConfig().indexConfig.lockType); if (slowFileExists(dir, IndexFetcher.INDEX_PROPERTIES)){ final IndexInput input = dir.openInput(IndexFetcher.INDEX_PROPERTIES, DirectoryFactory.IOCONTEXT_NO_CACHE); - + final InputStream is = new PropertiesInputStream(input); try { p.load(new InputStreamReader(is, StandardCharsets.UTF_8)); @@ -1083,7 +1076,7 @@ public class IndexFetcher { } } } - + } private final Map confFileInfoCache = new HashMap<>(); diff --git a/solr/core/src/java/org/apache/solr/handler/OldBackupDirectory.java b/solr/core/src/java/org/apache/solr/handler/OldBackupDirectory.java new file mode 100644 index 00000000000..c4b86c7eddf --- /dev/null +++ b/solr/core/src/java/org/apache/solr/handler/OldBackupDirectory.java @@ -0,0 +1,50 @@ +package org.apache.solr.handler; + +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import java.io.File; +import java.text.SimpleDateFormat; +import java.util.Date; +import java.util.Locale; +import java.util.regex.Matcher; +import java.util.regex.Pattern; + +class OldBackupDirectory implements Comparable { + File dir; + Date timestamp; + private final Pattern dirNamePattern = Pattern.compile("^snapshot[.](.*)$"); + + OldBackupDirectory(File dir) { + if(dir.isDirectory()) { + Matcher m = dirNamePattern.matcher(dir.getName()); + if(m.find()) { + try { + this.dir = dir; + this.timestamp = new SimpleDateFormat(SnapShooter.DATE_FMT, Locale.ROOT).parse(m.group(1)); + } catch(Exception e) { + this.dir = null; + this.timestamp = null; + } + } + } + } + @Override + public int compareTo(OldBackupDirectory that) { + return that.timestamp.compareTo(this.timestamp); + } +} diff --git a/solr/core/src/java/org/apache/solr/handler/ReplicationHandler.java b/solr/core/src/java/org/apache/solr/handler/ReplicationHandler.java index 3c49448e661..fdae8f90949 100644 --- a/solr/core/src/java/org/apache/solr/handler/ReplicationHandler.java +++ b/solr/core/src/java/org/apache/solr/handler/ReplicationHandler.java @@ -36,7 +36,9 @@ import java.util.HashMap; import java.util.List; import java.util.Map; import java.util.Properties; +import java.util.concurrent.ExecutorService; import java.util.concurrent.Executors; +import java.util.concurrent.Future; import java.util.concurrent.ScheduledExecutorService; import java.util.concurrent.TimeUnit; import java.util.concurrent.atomic.AtomicBoolean; @@ -146,6 +148,13 @@ public class ReplicationHandler extends RequestHandlerBase implements SolrCoreAw private ReentrantLock indexFetchLock = new ReentrantLock(); + private ExecutorService restoreExecutor = Executors.newSingleThreadExecutor( + new DefaultSolrThreadFactory("restoreExecutor")); + + private volatile Future restoreFuture; + + private volatile String currentRestoreName; + private String includeConfFiles; private NamedList confFileNameAlias = new NamedList<>(); @@ -205,13 +214,13 @@ public class ReplicationHandler extends RequestHandlerBase implements SolrCoreAw // It gives the current 'replicateable' index version if (command.equals(CMD_INDEX_VERSION)) { IndexCommit commitPoint = indexCommitPoint; // make a copy so it won't change - + if (commitPoint == null) { // if this handler is 'lazy', we may not have tracked the last commit // because our commit listener is registered on inform commitPoint = core.getDeletionPolicy().getLatestCommit(); } - + if (commitPoint != null && replicationEnabled.get()) { // // There is a race condition here. The commit point may be changed / deleted by the time @@ -235,6 +244,11 @@ public class ReplicationHandler extends RequestHandlerBase implements SolrCoreAw } else if (command.equalsIgnoreCase(CMD_BACKUP)) { doSnapShoot(new ModifiableSolrParams(solrParams), rsp, req); rsp.add(STATUS, OK_STATUS); + } else if (command.equalsIgnoreCase(CMD_RESTORE)) { + restore(new ModifiableSolrParams(solrParams), rsp, req); + rsp.add(STATUS, OK_STATUS); + } else if (command.equalsIgnoreCase(CMD_RESTORE_STATUS)) { + rsp.add(CMD_RESTORE_STATUS, getRestoreStatus()); } else if (command.equalsIgnoreCase(CMD_DELETE_BACKUP)) { deleteSnapshot(new ModifiableSolrParams(solrParams)); rsp.add(STATUS, OK_STATUS); @@ -302,7 +316,7 @@ public class ReplicationHandler extends RequestHandlerBase implements SolrCoreAw throw new SolrException(ErrorCode.BAD_REQUEST, "Missing mandatory param: name"); } - SnapShooter snapShooter = new SnapShooter(core, params.get("location"), params.get(NAME)); + SnapShooter snapShooter = new SnapShooter(core, params.get(LOCATION), params.get(NAME)); snapShooter.validateDeleteSnapshot(); snapShooter.deleteSnapAsync(this); } @@ -361,7 +375,7 @@ public class ReplicationHandler extends RequestHandlerBase implements SolrCoreAw } else { currentIndexFetcher = pollingIndexFetcher; } - return currentIndexFetcher.fetchLatestIndex(core, forceReplication); + return currentIndexFetcher.fetchLatestIndex(forceReplication); } catch (Exception e) { SolrException.log(LOG, "Index fetch failed ", e); } finally { @@ -377,6 +391,72 @@ public class ReplicationHandler extends RequestHandlerBase implements SolrCoreAw return indexFetchLock.isLocked(); } + private void restore(SolrParams params, SolrQueryResponse rsp, SolrQueryRequest req) { + if (restoreFuture != null && !restoreFuture.isDone()) { + throw new SolrException(ErrorCode.BAD_REQUEST, "Restore in progress. Cannot run multiple restore operations" + + "for the same core"); + } + String name = params.get(NAME); + String location = params.get(LOCATION); + + //If location is not provided then assume that the restore index is present inside the data directory. + if (location == null) { + location = core.getDataDir(); + } + + //If name is not provided then look for the last unnamed( the ones with the snapshot.timestamp format) + //snapshot folder since we allow snapshots to be taken without providing a name. Pick the latest timestamp. + if (name == null) { + File[] files = new File(location).listFiles(); + List dirs = new ArrayList<>(); + for (File f : files) { + OldBackupDirectory obd = new OldBackupDirectory(f); + if (obd.dir != null) { + dirs.add(obd); + } + } + Collections.sort(dirs); + if (dirs.size() == 0) { + throw new SolrException(ErrorCode.BAD_REQUEST, "No backup name specified and none found in " + core.getDataDir()); + } + name = dirs.get(0).dir.getName(); + } else { + //"snapshot." is prefixed by snapshooter + name = "snapshot." + name; + } + + RestoreCore restoreCore = new RestoreCore(core, location, name); + restoreFuture = restoreExecutor.submit(restoreCore); + currentRestoreName = name; + } + + private NamedList getRestoreStatus() { + NamedList status = new SimpleOrderedMap<>(); + + if (restoreFuture == null) { + status.add(STATUS, "No restore actions in progress"); + return status; + } + + status.add("snapshotName", currentRestoreName); + if (restoreFuture.isDone()) { + try { + boolean success = restoreFuture.get(); + if (success) { + status.add(STATUS, SUCCESS); + } else { + status.add(STATUS, FAILED); + } + } catch (Exception e) { + status.add(STATUS, FAILED); + status.add(EXCEPTION, e.getMessage()); + } + } else { + status.add(STATUS, "In Progress"); + } + return status; + } + private void doSnapShoot(SolrParams params, SolrQueryResponse rsp, SolrQueryRequest req) { try { @@ -391,19 +471,19 @@ public class ReplicationHandler extends RequestHandlerBase implements SolrCoreAw if (numberToKeep < 1) { numberToKeep = Integer.MAX_VALUE; } - + IndexDeletionPolicyWrapper delPolicy = core.getDeletionPolicy(); IndexCommit indexCommit = delPolicy.getLatestCommit(); - + if (indexCommit == null) { indexCommit = req.getSearcher().getIndexReader().getIndexCommit(); } - + // small race here before the commit point is saved SnapShooter snapShooter = new SnapShooter(core, params.get("location"), params.get(NAME)); snapShooter.validateCreateSnapshot(); snapShooter.createSnapAsync(indexCommit, numberToKeep, this); - + } catch (Exception e) { LOG.warn("Exception during creating a snapshot", e); rsp.add("exception", e); @@ -420,7 +500,7 @@ public class ReplicationHandler extends RequestHandlerBase implements SolrCoreAw private void getFileStream(SolrParams solrParams, SolrQueryResponse rsp) { ModifiableSolrParams rawParams = new ModifiableSolrParams(solrParams); rawParams.set(CommonParams.WT, FILE_STREAM); - + String cfileName = solrParams.get(CONF_FILE_SHORT); if (cfileName != null) { rsp.add(FILE_STREAM, new LocalFsFileStream(solrParams)); @@ -438,7 +518,7 @@ public class ReplicationHandler extends RequestHandlerBase implements SolrCoreAw } long gen = Long.parseLong(v); IndexCommit commit = core.getDeletionPolicy().getCommitPoint(gen); - + //System.out.println("ask for files for gen:" + commit.getGeneration() + core.getCoreDescriptor().getCoreContainer().getZkController().getNodeName()); if (commit == null) { rsp.add("status", "invalid index generation"); @@ -456,7 +536,7 @@ public class ReplicationHandler extends RequestHandlerBase implements SolrCoreAw Map fileMeta = new HashMap<>(); fileMeta.put(NAME, file); fileMeta.put(SIZE, dir.fileLength(file)); - + try (final IndexInput in = dir.openInput(file, IOContext.READONCE)) { try { long checksum = CodecUtil.retrieveChecksum(in); @@ -465,13 +545,13 @@ public class ReplicationHandler extends RequestHandlerBase implements SolrCoreAw LOG.warn("Could not read checksum from index file: " + file, e); } } - + result.add(fileMeta); } } // add the segments_N file - + Map fileMeta = new HashMap<>(); fileMeta.put(NAME, infos.getSegmentsFileName()); fileMeta.put(SIZE, dir.fileLength(infos.getSegmentsFileName())); @@ -487,7 +567,7 @@ public class ReplicationHandler extends RequestHandlerBase implements SolrCoreAw result.add(fileMeta); } catch (IOException e) { rsp.add("status", "unable to get file names for given index generation"); - rsp.add("exception", e); + rsp.add(EXCEPTION, e); LOG.error("Unable to get file names for indexCommit generation: " + gen, e); } finally { if (dir != null) { @@ -613,7 +693,7 @@ public class ReplicationHandler extends RequestHandlerBase implements SolrCoreAw return "ReplicationHandler provides replication of index and configuration files from Master to Slaves"; } - /** + /** * returns the CommitVersionInfo for the current searcher, or null on error. */ private CommitVersionInfo getIndexVersion() { @@ -694,7 +774,7 @@ public class ReplicationHandler extends RequestHandlerBase implements SolrCoreAw CommitVersionInfo vInfo = getIndexVersion(); details.add("indexVersion", null == vInfo ? 0 : vInfo.version); details.add(GENERATION, null == vInfo ? 0 : vInfo.generation); - + IndexCommit commit = indexCommitPoint; // make a copy so it won't change if (isMaster) { @@ -832,11 +912,11 @@ public class ReplicationHandler extends RequestHandlerBase implements SolrCoreAw details.add("master", master); if (slave.size() > 0) details.add("slave", slave); - + NamedList snapshotStats = snapShootDetails; if (snapshotStats != null) details.add(CMD_BACKUP, snapshotStats); - + return details; } @@ -971,12 +1051,12 @@ public class ReplicationHandler extends RequestHandlerBase implements SolrCoreAw Boolean.toString(enableMaster) + " and slave setting is " + Boolean.toString(enableSlave)); } } - + if (!enableSlave && !enableMaster) { enableMaster = true; master = new NamedList<>(); } - + if (enableMaster) { includeConfFiles = (String) master.get(CONF_FILES); if (includeConfFiles != null && includeConfFiles.trim().length() > 0) { @@ -999,7 +1079,7 @@ public class ReplicationHandler extends RequestHandlerBase implements SolrCoreAw if (!replicateOnCommit && ! replicateOnOptimize) { replicateOnCommit = true; } - + // if we only want to replicate on optimize, we need the deletion policy to // save the last optimized commit point. if (replicateOnOptimize) { @@ -1068,7 +1148,7 @@ public class ReplicationHandler extends RequestHandlerBase implements SolrCoreAw isMaster = true; } } - + // check master or slave is enabled private boolean isEnabled( NamedList params ){ if( params == null ) return false; @@ -1106,6 +1186,19 @@ public class ReplicationHandler extends RequestHandlerBase implements SolrCoreAw @Override public void postClose(SolrCore core) {} }); + + core.addCloseHook(new CloseHook() { + @Override + public void preClose(SolrCore core) { + ExecutorUtil.shutdownNowAndAwaitTermination(restoreExecutor); + if (restoreFuture != null) { + restoreFuture.cancel(true); + } + } + + @Override + public void postClose(SolrCore core) {} + }); } /** @@ -1407,6 +1500,14 @@ public class ReplicationHandler extends RequestHandlerBase implements SolrCoreAw return result; } + private static final String LOCATION = "location"; + + private static final String SUCCESS = "success"; + + private static final String FAILED = "failed"; + + private static final String EXCEPTION = "exception"; + public static final String MASTER_URL = "masterUrl"; public static final String STATUS = "status"; @@ -1417,6 +1518,10 @@ public class ReplicationHandler extends RequestHandlerBase implements SolrCoreAw public static final String CMD_BACKUP = "backup"; + public static final String CMD_RESTORE = "restore"; + + public static final String CMD_RESTORE_STATUS = "restorestatus"; + public static final String CMD_FETCH_INDEX = "fetchindex"; public static final String CMD_ABORT_FETCH = "abortfetch"; diff --git a/solr/core/src/java/org/apache/solr/handler/RestoreCore.java b/solr/core/src/java/org/apache/solr/handler/RestoreCore.java new file mode 100644 index 00000000000..0d86ca57f89 --- /dev/null +++ b/solr/core/src/java/org/apache/solr/handler/RestoreCore.java @@ -0,0 +1,149 @@ +package org.apache.solr.handler; + +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import java.nio.file.Path; +import java.nio.file.Paths; +import java.util.concurrent.Callable; +import java.util.concurrent.Future; + +import org.apache.lucene.codecs.CodecUtil; +import org.apache.lucene.store.Directory; +import org.apache.lucene.store.FSDirectory; +import org.apache.lucene.store.IOContext; +import org.apache.lucene.store.IndexInput; +import org.apache.solr.common.SolrException; +import org.apache.solr.core.DirectoryFactory; +import org.apache.solr.core.SolrCore; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +public class RestoreCore implements Callable { + + private static final Logger log = LoggerFactory.getLogger(RestoreCore.class.getName()); + + private final String backupName; + private final String backupLocation; + private final SolrCore core; + + public RestoreCore(SolrCore core, String location, String name) { + this.core = core; + this.backupLocation = location; + this.backupName = name; + } + + @Override + public Boolean call() throws Exception { + return doRestore(); + } + + private boolean doRestore() throws Exception { + + Path backupPath = Paths.get(backupLocation, backupName); + String restoreIndexName = "restore." + backupName; + Path restoreIndexPath = Paths.get(core.getDataDir(), restoreIndexName); + + Directory restoreIndexDir = null; + Directory indexDir = null; + try (Directory backupDir = FSDirectory.open(backupPath)) { + + restoreIndexDir = core.getDirectoryFactory().get(restoreIndexPath.toString(), + DirectoryFactory.DirContext.DEFAULT, core.getSolrConfig().indexConfig.lockType); + + //Prefer local copy. + indexDir = core.getDirectoryFactory().get(core.getIndexDir(), + DirectoryFactory.DirContext.DEFAULT, core.getSolrConfig().indexConfig.lockType); + + //Move all files from backupDir to restoreIndexDir + for (String filename : backupDir.listAll()) { + checkInterrupted(); + log.info("Copying over file to restore directory " + filename); + try (IndexInput indexInput = backupDir.openInput(filename, IOContext.READONCE)) { + long checksum = CodecUtil.retrieveChecksum(indexInput); + long length = indexInput.length(); + IndexFetcher.CompareResult compareResult = IndexFetcher.compareFile(indexDir, filename, length, checksum); + if (!compareResult.equal || (!compareResult.checkSummed && (filename.endsWith(".si") + || filename.endsWith(".liv") || filename.startsWith("segments_")))) { + restoreIndexDir.copyFrom(backupDir, filename, filename, IOContext.READONCE); + } else { + //prefer local copy + restoreIndexDir.copyFrom(indexDir, filename, filename, IOContext.READONCE); + } + } catch (Exception e) { + throw new SolrException(SolrException.ErrorCode.UNKNOWN, "Exception while restoring the backup index", e); + } + } + log.debug("Switching directories"); + IndexFetcher.modifyIndexProps(core, restoreIndexName); + + boolean success; + try { + core.getUpdateHandler().newIndexWriter(false); + openNewSearcher(); + success = true; + log.info("Successfully restored to the backup index"); + } catch (Exception e) { + //Rollback to the old index directory. Delete the restore index directory and mark the restore as failed. + log.info("Could not switch to restored index. Rolling back to the current index"); + Directory dir = null; + try { + dir = core.getDirectoryFactory().get(core.getDataDir(), DirectoryFactory.DirContext.META_DATA, + core.getSolrConfig().indexConfig.lockType); + dir.deleteFile(IndexFetcher.INDEX_PROPERTIES); + } finally { + if (dir != null) { + core.getDirectoryFactory().release(dir); + } + } + + core.getDirectoryFactory().doneWithDirectory(restoreIndexDir); + core.getDirectoryFactory().remove(restoreIndexDir); + core.getUpdateHandler().newIndexWriter(false); + openNewSearcher(); + throw new SolrException(SolrException.ErrorCode.UNKNOWN, "Exception while restoring the backup index", e); + } + if (success) { + core.getDirectoryFactory().doneWithDirectory(indexDir); + core.getDirectoryFactory().remove(indexDir); + } + + return true; + } finally { + if (restoreIndexDir != null) { + core.getDirectoryFactory().release(restoreIndexDir); + } + if (indexDir != null) { + core.getDirectoryFactory().release(indexDir); + } + } + } + + private void checkInterrupted() throws InterruptedException { + if (Thread.currentThread().isInterrupted()) { + throw new InterruptedException("Stopping restore process. Thread was interrupted."); + } + } + + private void openNewSearcher() throws Exception { + Future[] waitSearcher = new Future[1]; + core.getSearcher(true, false, waitSearcher, true); + if (waitSearcher[0] != null) { + waitSearcher[0].get(); + } + } +} diff --git a/solr/core/src/java/org/apache/solr/handler/SnapShooter.java b/solr/core/src/java/org/apache/solr/handler/SnapShooter.java index 60bf3c80654..e6cb1ac1703 100644 --- a/solr/core/src/java/org/apache/solr/handler/SnapShooter.java +++ b/solr/core/src/java/org/apache/solr/handler/SnapShooter.java @@ -18,6 +18,7 @@ package org.apache.solr.handler; import java.io.File; import java.io.IOException; +import java.nio.file.Paths; import java.text.SimpleDateFormat; import java.util.ArrayList; import java.util.Collection; @@ -58,10 +59,11 @@ public class SnapShooter { public SnapShooter(SolrCore core, String location, String snapshotName) { solrCore = core; - if (location == null) snapDir = core.getDataDir(); + if (location == null) { + snapDir = core.getDataDir(); + } else { - File base = new File(core.getCoreDescriptor().getInstanceDir()); - snapDir = org.apache.solr.util.FileUtils.resolvePath(base, location).getAbsolutePath(); + snapDir = Paths.get(core.getCoreDescriptor().getInstanceDir()).resolve(location).toAbsolutePath().toString(); } this.snapshotName = snapshotName; @@ -125,7 +127,7 @@ public class SnapShooter { } void createSnapshot(final IndexCommit indexCommit, ReplicationHandler replicationHandler) { - LOG.info("Creating backup snapshot..."); + LOG.info("Creating backup snapshot " + (snapshotName == null ? "" : snapshotName)); NamedList details = new NamedList<>(); details.add("startTime", new Date().toString()); try { @@ -193,31 +195,6 @@ public class SnapShooter { replicationHandler.snapShootDetails = details; } - private class OldBackupDirectory implements Comparable{ - File dir; - Date timestamp; - final Pattern dirNamePattern = Pattern.compile("^snapshot[.](.*)$"); - - OldBackupDirectory(File dir) { - if(dir.isDirectory()) { - Matcher m = dirNamePattern.matcher(dir.getName()); - if(m.find()) { - try { - this.dir = dir; - this.timestamp = new SimpleDateFormat(DATE_FMT, Locale.ROOT).parse(m.group(1)); - } catch(Exception e) { - this.dir = null; - this.timestamp = null; - } - } - } - } - @Override - public int compareTo(OldBackupDirectory that) { - return that.timestamp.compareTo(this.timestamp); - } - } - public static final String DATE_FMT = "yyyyMMddHHmmssSSS"; diff --git a/solr/core/src/test/org/apache/solr/handler/TestReplicationHandlerBackup.java b/solr/core/src/test/org/apache/solr/handler/TestReplicationHandlerBackup.java index b43c3b0d2af..84fca530b23 100644 --- a/solr/core/src/test/org/apache/solr/handler/TestReplicationHandlerBackup.java +++ b/solr/core/src/test/org/apache/solr/handler/TestReplicationHandlerBackup.java @@ -121,7 +121,7 @@ public class TestReplicationHandlerBackup extends SolrJettyTestBase { @Test public void testBackupOnCommit() throws Exception { //Index - int nDocs = indexDocs(); + int nDocs = indexDocs(masterClient); //Confirm if completed CheckBackupStatus checkBackupStatus = new CheckBackupStatus((HttpSolrClient) masterClient); @@ -146,7 +146,7 @@ public class TestReplicationHandlerBackup extends SolrJettyTestBase { } } - private int indexDocs() throws IOException, SolrServerException { + protected static int indexDocs(SolrClient masterClient) throws IOException, SolrServerException { int nDocs = TestUtil.nextInt(random(), 1, 100); masterClient.deleteByQuery("*:*"); for (int i = 0; i < nDocs; i++) { @@ -164,7 +164,7 @@ public class TestReplicationHandlerBackup extends SolrJettyTestBase { @Test public void doTestBackup() throws Exception { - int nDocs = indexDocs(); + int nDocs = indexDocs(masterClient); Path[] snapDir = new Path[5]; //One extra for the backup on commit //First snapshot location @@ -180,18 +180,17 @@ public class TestReplicationHandlerBackup extends SolrJettyTestBase { backupNames = new String[4]; } for (int i = 0; i < 4; i++) { - BackupCommand backupCommand; final String backupName = TestUtil.randomSimpleString(random(), 1, 20); if (!namedBackup) { - backupCommand = new BackupCommand(addNumberToKeepInRequest, backupKeepParamName, ReplicationHandler.CMD_BACKUP); + if (addNumberToKeepInRequest) { + runBackupCommand(masterJetty, ReplicationHandler.CMD_BACKUP, "&" + backupKeepParamName + "=2"); + } else { + runBackupCommand(masterJetty, ReplicationHandler.CMD_BACKUP, ""); + } } else { - backupCommand = new BackupCommand(backupName, ReplicationHandler.CMD_BACKUP); + runBackupCommand(masterJetty, ReplicationHandler.CMD_BACKUP, "&name=" + backupName); backupNames[i] = backupName; } - backupCommand.runCommand(); - if (backupCommand.fail != null) { - fail(backupCommand.fail); - } CheckBackupStatus checkBackupStatus = new CheckBackupStatus((HttpSolrClient) masterClient, firstBackupTimestamp); while (!checkBackupStatus.success) { @@ -253,8 +252,7 @@ public class TestReplicationHandlerBackup extends SolrJettyTestBase { private void testDeleteNamedBackup(String backupNames[]) throws InterruptedException, IOException { String lastTimestamp = null; for (int i = 0; i < 2; i++) { - BackupCommand deleteBackupCommand = new BackupCommand(backupNames[i], ReplicationHandler.CMD_DELETE_BACKUP); - deleteBackupCommand.runCommand(); + runBackupCommand(masterJetty, ReplicationHandler.CMD_DELETE_BACKUP, "&name=" +backupNames[i]); CheckDeleteBackupStatus checkDeleteBackupStatus = new CheckDeleteBackupStatus(backupNames[i], lastTimestamp); while (true) { boolean success = checkDeleteBackupStatus.fetchStatus(); @@ -267,52 +265,19 @@ public class TestReplicationHandlerBackup extends SolrJettyTestBase { } Thread.sleep(200); } - - if (deleteBackupCommand.fail != null) { - fail(deleteBackupCommand.fail); - } } } - private class BackupCommand { - String fail = null; - final boolean addNumberToKeepInRequest; - String backupKeepParamName; - String backupName; - String cmd; - - BackupCommand(boolean addNumberToKeepInRequest, String backupKeepParamName, String command) { - this.addNumberToKeepInRequest = addNumberToKeepInRequest; - this.backupKeepParamName = backupKeepParamName; - this.cmd = command; - } - BackupCommand(String backupName, String command) { - this.backupName = backupName; - addNumberToKeepInRequest = false; - this.cmd = command; - } - - public void runCommand() { - String masterUrl; - if(backupName != null) { - masterUrl = buildUrl(masterJetty.getLocalPort(), context) + "/" + DEFAULT_TEST_CORENAME + "/replication?command=" + cmd + - "&name=" + backupName; - } else { - masterUrl = buildUrl(masterJetty.getLocalPort(), context) + "/" + DEFAULT_TEST_CORENAME + "/replication?command=" + cmd + - (addNumberToKeepInRequest ? "&" + backupKeepParamName + "=2" : ""); - } - - InputStream stream = null; - try { - URL url = new URL(masterUrl); - stream = url.openStream(); - stream.close(); - } catch (Exception e) { - fail = e.getMessage(); - } finally { - IOUtils.closeQuietly(stream); - } - + public static void runBackupCommand(JettySolrRunner masterJetty, String cmd, String params) throws IOException { + String masterUrl = buildUrl(masterJetty.getLocalPort(), context) + "/" + DEFAULT_TEST_CORENAME + + "/replication?command=" + cmd + params; + InputStream stream = null; + try { + URL url = new URL(masterUrl); + stream = url.openStream(); + stream.close(); + } finally { + IOUtils.closeQuietly(stream); } } @@ -349,6 +314,6 @@ public class TestReplicationHandlerBackup extends SolrJettyTestBase { IOUtils.closeQuietly(stream); } return false; - }; + } } } diff --git a/solr/core/src/test/org/apache/solr/handler/TestRestoreCore.java b/solr/core/src/test/org/apache/solr/handler/TestRestoreCore.java new file mode 100644 index 00000000000..bdedc7c6331 --- /dev/null +++ b/solr/core/src/test/org/apache/solr/handler/TestRestoreCore.java @@ -0,0 +1,243 @@ +package org.apache.solr.handler; + +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + + +import java.io.File; +import java.io.IOException; +import java.io.InputStream; +import java.net.URL; +import java.net.URLEncoder; +import java.nio.file.Files; +import java.nio.file.Path; +import java.nio.file.Paths; +import java.util.regex.Pattern; + +import org.apache.commons.io.IOUtils; +import org.apache.lucene.index.IndexFileNames; +import org.apache.lucene.util.TestUtil; +import org.apache.solr.SolrJettyTestBase; +import org.apache.solr.SolrTestCaseJ4; +import org.apache.solr.client.solrj.SolrClient; +import org.apache.solr.client.solrj.SolrServerException; +import org.apache.solr.client.solrj.embedded.JettySolrRunner; +import org.apache.solr.client.solrj.impl.HttpSolrClient; +import org.apache.solr.client.solrj.response.QueryResponse; +import org.apache.solr.common.SolrInputDocument; +import org.apache.solr.common.params.ModifiableSolrParams; +import org.apache.solr.util.FileUtils; +import org.junit.After; +import org.junit.Before; +import org.junit.Test; + +@SolrTestCaseJ4.SuppressSSL // Currently unknown why SSL does not work with this test +public class TestRestoreCore extends SolrJettyTestBase { + + JettySolrRunner masterJetty; + TestReplicationHandler.SolrInstance master = null; + SolrClient masterClient; + + private static final String CONF_DIR = "solr" + File.separator + "collection1" + File.separator + "conf" + + File.separator; + + private static String context = "/solr"; + + private static JettySolrRunner createJetty(TestReplicationHandler.SolrInstance instance) throws Exception { + FileUtils.copyFile(new File(SolrTestCaseJ4.TEST_HOME(), "solr.xml"), new File(instance.getHomeDir(), "solr.xml")); + JettySolrRunner jetty = new JettySolrRunner(instance.getHomeDir(), "/solr", 0); + jetty.setDataDir(instance.getDataDir()); + jetty.start(); + return jetty; + } + + private static SolrClient createNewSolrClient(int port) { + try { + // setup the client... + HttpSolrClient client = new HttpSolrClient(buildUrl(port, context) + "/" + DEFAULT_TEST_CORENAME); + client.setConnectionTimeout(15000); + client.setSoTimeout(60000); + client.setDefaultMaxConnectionsPerHost(100); + client.setMaxTotalConnections(100); + return client; + } + catch (Exception ex) { + throw new RuntimeException(ex); + } + } + + + @Before + public void setUp() throws Exception { + super.setUp(); + String configFile = "solrconfig-master.xml"; + + master = new TestReplicationHandler.SolrInstance(createTempDir("solr-instance").toFile(), "master", null); + master.setUp(); + master.copyConfigFile(CONF_DIR + configFile, "solrconfig.xml"); + + masterJetty = createJetty(master); + masterClient = createNewSolrClient(masterJetty.getLocalPort()); + } + + @Override + @After + public void tearDown() throws Exception { + super.tearDown(); + masterClient.close(); + masterClient = null; + masterJetty.stop(); + masterJetty = null; + master = null; + } + + @Test + public void testSimpleRestore() throws Exception { + + int nDocs = TestReplicationHandlerBackup.indexDocs(masterClient); + + String snapshotName; + String location; + String params = ""; + + //Use the default backup location or an externally provided location. + if (random().nextBoolean()) { + location = createTempDir().toFile().getAbsolutePath(); + params += "&location=" + URLEncoder.encode(location, "UTF-8"); + } + + //named snapshot vs default snapshot name + if (random().nextBoolean()) { + snapshotName = TestUtil.randomSimpleString(random(), 1, 5); + params += "&name=" + snapshotName; + } + + TestReplicationHandlerBackup.runBackupCommand(masterJetty, ReplicationHandler.CMD_BACKUP, params); + + CheckBackupStatus checkBackupStatus = new CheckBackupStatus((HttpSolrClient) masterClient, null); + while (!checkBackupStatus.success) { + checkBackupStatus.fetchStatus(); + Thread.sleep(1000); + } + + //Modify existing index before we call restore. + + //Delete a few docs + int numDeletes = TestUtil.nextInt(random(), 1, nDocs); + for(int i=0; i(.*?)"); + + InputStream stream = null; + try { + URL url = new URL(masterUrl); + stream = url.openStream(); + String response = IOUtils.toString(stream, "UTF-8"); + if(pException.matcher(response).find()) { + fail("Failed to complete restore action"); + } + if(response.contains("success")) { + return true; + } else if (response.contains("failed")){ + fail("Restore Failed"); + } + stream.close(); + } finally { + IOUtils.closeQuietly(stream); + } + return false; + } +} From db2faf0c25f94f73cd80fc398b140e8321095dd8 Mon Sep 17 00:00:00 2001 From: Robert Muir Date: Fri, 3 Apr 2015 13:16:58 +0000 Subject: [PATCH 5/9] LUCENE-6385: add null check to WeightedSpanTermExtractor git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1671064 13f79535-47bb-0310-9956-ffa450edef68 --- lucene/CHANGES.txt | 4 +- .../highlight/WeightedSpanTermExtractor.java | 3 + .../lucene/search/highlight/MissesTest.java | 82 +++++++++++++++++++ 3 files changed, 87 insertions(+), 2 deletions(-) create mode 100644 lucene/highlighter/src/test/org/apache/lucene/search/highlight/MissesTest.java diff --git a/lucene/CHANGES.txt b/lucene/CHANGES.txt index 4ef155e4551..4dad578f9fb 100644 --- a/lucene/CHANGES.txt +++ b/lucene/CHANGES.txt @@ -40,10 +40,10 @@ API Changes New Features -* LUCENE-6308: Span queries now share document conjunction/intersection +* LUCENE-6308, LUCENE-6385: Span queries now share document conjunction/intersection code with boolean queries, and use two-phased iterators for faster intersection by avoiding loading positions in certain cases. - (Paul Elschot, Robert Muir via Mike McCandless) + (Paul Elschot, Terry Smith, Robert Muir via Mike McCandless) * LUCENE-6352: Added a new query time join to the join module that uses global ordinals, which is faster for subsequent joins between reopens. diff --git a/lucene/highlighter/src/java/org/apache/lucene/search/highlight/WeightedSpanTermExtractor.java b/lucene/highlighter/src/java/org/apache/lucene/search/highlight/WeightedSpanTermExtractor.java index 80160ace1c9..1768485c74e 100644 --- a/lucene/highlighter/src/java/org/apache/lucene/search/highlight/WeightedSpanTermExtractor.java +++ b/lucene/highlighter/src/java/org/apache/lucene/search/highlight/WeightedSpanTermExtractor.java @@ -306,6 +306,9 @@ public class WeightedSpanTermExtractor { } Bits acceptDocs = context.reader().getLiveDocs(); final Spans spans = q.getSpans(context, acceptDocs, termContexts); + if (spans == null) { + return; + } // collect span positions while (spans.nextDoc() != Spans.NO_MORE_DOCS) { diff --git a/lucene/highlighter/src/test/org/apache/lucene/search/highlight/MissesTest.java b/lucene/highlighter/src/test/org/apache/lucene/search/highlight/MissesTest.java new file mode 100644 index 00000000000..d56f39534dd --- /dev/null +++ b/lucene/highlighter/src/test/org/apache/lucene/search/highlight/MissesTest.java @@ -0,0 +1,82 @@ +package org.apache.lucene.search.highlight; + +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import java.io.IOException; + +import org.apache.lucene.analysis.Analyzer; +import org.apache.lucene.analysis.MockAnalyzer; +import org.apache.lucene.analysis.MockTokenizer; +import org.apache.lucene.index.Term; +import org.apache.lucene.search.BooleanClause.Occur; +import org.apache.lucene.search.BooleanQuery; +import org.apache.lucene.search.PhraseQuery; +import org.apache.lucene.search.Query; +import org.apache.lucene.search.TermQuery; +import org.apache.lucene.search.spans.SpanNearQuery; +import org.apache.lucene.search.spans.SpanQuery; +import org.apache.lucene.search.spans.SpanTermQuery; +import org.apache.lucene.util.LuceneTestCase; + +public class MissesTest extends LuceneTestCase { + public void testTermQuery() throws IOException, InvalidTokenOffsetsException { + try (Analyzer analyzer = new MockAnalyzer(random(), MockTokenizer.WHITESPACE, false)) { + final Query query = new TermQuery(new Term("test", "foo")); + final Highlighter highlighter = new Highlighter(new SimpleHTMLFormatter(), new QueryScorer(query)); + assertEquals("this is a foo bar example", + highlighter.getBestFragment(analyzer, "test", "this is a foo bar example")); + assertNull(highlighter.getBestFragment(analyzer, "test", "this does not match")); + } + } + + public void testBooleanQuery() throws IOException, InvalidTokenOffsetsException { + try (Analyzer analyzer = new MockAnalyzer(random(), MockTokenizer.WHITESPACE, false)) { + final BooleanQuery query = new BooleanQuery(); + query.add(new TermQuery(new Term("test", "foo")), Occur.MUST); + query.add(new TermQuery(new Term("test", "bar")), Occur.MUST); + final Highlighter highlighter = new Highlighter(new SimpleHTMLFormatter(), new QueryScorer(query)); + assertEquals("this is a foo bar example", + highlighter.getBestFragment(analyzer, "test", "this is a foo bar example")); + assertNull(highlighter.getBestFragment(analyzer, "test", "this does not match")); + } + } + + public void testPhraseQuery() throws IOException, InvalidTokenOffsetsException { + try (Analyzer analyzer = new MockAnalyzer(random(), MockTokenizer.WHITESPACE, false)) { + final PhraseQuery query = new PhraseQuery(); + query.add(new Term("test", "foo")); + query.add(new Term("test", "bar")); + final Highlighter highlighter = new Highlighter(new SimpleHTMLFormatter(), new QueryScorer(query)); + assertEquals("this is a foo bar example", + highlighter.getBestFragment(analyzer, "test", "this is a foo bar example")); + assertNull(highlighter.getBestFragment(analyzer, "test", "this does not match")); + } + } + + public void testSpanNearQuery() throws IOException, InvalidTokenOffsetsException { + try (Analyzer analyzer = new MockAnalyzer(random(), MockTokenizer.WHITESPACE, false)) { + final Query query = new SpanNearQuery(new SpanQuery[] { + new SpanTermQuery(new Term("test", "foo")), + new SpanTermQuery(new Term("test", "bar"))}, 0, true); + final Highlighter highlighter = new Highlighter(new SimpleHTMLFormatter(), new QueryScorer(query)); + assertEquals("this is a foo bar example", + highlighter.getBestFragment(analyzer, "test", "this is a foo bar example")); + assertNull(highlighter.getBestFragment(analyzer, "test", "this does not match")); + } + } +} From 0666344038b338bea281f478a349819a0d351f5e Mon Sep 17 00:00:00 2001 From: Robert Muir Date: Fri, 3 Apr 2015 14:39:33 +0000 Subject: [PATCH 6/9] LUCENE-6388: Optimize SpanNearQuery git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1671078 13f79535-47bb-0310-9956-ffa450edef68 --- lucene/CHANGES.txt | 3 +++ .../apache/lucene/search/spans/NearSpans.java | 17 +++++-------- .../lucene/search/spans/NearSpansOrdered.java | 24 +++++++------------ .../search/spans/NearSpansPayloadOrdered.java | 8 +++---- .../lucene/search/spans/SpanNearQuery.java | 11 +++++---- 5 files changed, 27 insertions(+), 36 deletions(-) diff --git a/lucene/CHANGES.txt b/lucene/CHANGES.txt index 4dad578f9fb..0e2ec1129d4 100644 --- a/lucene/CHANGES.txt +++ b/lucene/CHANGES.txt @@ -56,6 +56,9 @@ Optimizations faster IndexWriter.deleteAll in that case (Robert Muir, Adrien Grand, Mike McCandless) +* LUCENE-6388: Optimize SpanNearQuery when payloads are not present. + (Robert Muir) + Bug Fixes * LUCENE-6378: Fix all RuntimeExceptions to throw the underlying root cause. diff --git a/lucene/core/src/java/org/apache/lucene/search/spans/NearSpans.java b/lucene/core/src/java/org/apache/lucene/search/spans/NearSpans.java index e2251731992..91382f0814e 100644 --- a/lucene/core/src/java/org/apache/lucene/search/spans/NearSpans.java +++ b/lucene/core/src/java/org/apache/lucene/search/spans/NearSpans.java @@ -29,11 +29,11 @@ import java.util.Objects; * Common super class for un/ordered Spans */ abstract class NearSpans extends Spans { - SpanNearQuery query; - int allowedSlop; + final SpanNearQuery query; + final int allowedSlop; - List subSpans; // in query order - DocIdSetIterator conjunction; // use to move to next doc with all clauses + final Spans[] subSpans; // in query order + final DocIdSetIterator conjunction; // use to move to next doc with all clauses boolean atFirstInCurrentDoc; boolean oneExhaustedInCurrentDoc; // no more results possbile in current doc @@ -44,7 +44,7 @@ abstract class NearSpans extends Spans { if (subSpans.size() < 2) { throw new IllegalArgumentException("Less than 2 subSpans: " + query); } - this.subSpans = Objects.requireNonNull(subSpans); // in query order + this.subSpans = subSpans.toArray(new Spans[subSpans.size()]); // in query order this.conjunction = ConjunctionDISI.intersect(subSpans); } @@ -91,13 +91,8 @@ abstract class NearSpans extends Spans { return res; } - private Spans[] subSpansArray = null; // init only when needed. - public Spans[] getSubSpans() { - if (subSpansArray == null) { - subSpansArray = subSpans.toArray(new Spans[subSpans.size()]); - } - return subSpansArray; + return subSpans; } } diff --git a/lucene/core/src/java/org/apache/lucene/search/spans/NearSpansOrdered.java b/lucene/core/src/java/org/apache/lucene/search/spans/NearSpansOrdered.java index a77651e8e62..223d29f7ad0 100644 --- a/lucene/core/src/java/org/apache/lucene/search/spans/NearSpansOrdered.java +++ b/lucene/core/src/java/org/apache/lucene/search/spans/NearSpansOrdered.java @@ -18,12 +18,8 @@ package org.apache.lucene.search.spans; */ import java.io.IOException; -import java.util.ArrayList; -import java.util.HashSet; -import java.util.LinkedList; import java.util.List; import java.util.Collection; -import java.util.Set; /** A Spans that is formed from the ordered subspans of a SpanNearQuery * where the subspans do not overlap and have a maximum slop between them, @@ -146,11 +142,11 @@ public class NearSpansOrdered extends NearSpans { * otherwise at least one is exhausted in the current doc. */ private boolean stretchToOrder() throws IOException { - Spans prevSpans = subSpans.get(0); + Spans prevSpans = subSpans[0]; assert prevSpans.startPosition() != NO_MORE_POSITIONS : "prevSpans no start position "+prevSpans; assert prevSpans.endPosition() != NO_MORE_POSITIONS; - for (int i = 1; i < subSpans.size(); i++) { - Spans spans = subSpans.get(i); + for (int i = 1; i < subSpans.length; i++) { + Spans spans = subSpans[i]; assert spans.startPosition() != NO_MORE_POSITIONS; assert spans.endPosition() != NO_MORE_POSITIONS; @@ -169,15 +165,14 @@ public class NearSpansOrdered extends NearSpans { * on all subSpans, except the last one, in reverse order. */ protected boolean shrinkToAfterShortestMatch() throws IOException { - Spans lastSubSpans = subSpans.get(subSpans.size() - 1); + Spans lastSubSpans = subSpans[subSpans.length - 1]; matchStart = lastSubSpans.startPosition(); matchEnd = lastSubSpans.endPosition(); int matchSlop = 0; int lastStart = matchStart; - int lastEnd = matchEnd; - for (int i = subSpans.size() - 2; i >= 0; i--) { - Spans prevSpans = subSpans.get(i); + for (int i = subSpans.length - 2; i >= 0; i--) { + Spans prevSpans = subSpans[i]; int prevStart = prevSpans.startPosition(); int prevEnd = prevSpans.endPosition(); @@ -206,7 +201,6 @@ public class NearSpansOrdered extends NearSpans { */ matchStart = prevStart; lastStart = prevStart; - lastEnd = prevEnd; } boolean match = matchSlop <= allowedSlop; @@ -224,16 +218,14 @@ public class NearSpansOrdered extends NearSpans { return atFirstInCurrentDoc ? -1 : matchEnd; } - /** Throws an UnsupportedOperationException */ @Override public Collection getPayload() throws IOException { - throw new UnsupportedOperationException("Use NearSpansPayloadOrdered instead"); + return null; } - /** Throws an UnsupportedOperationException */ @Override public boolean isPayloadAvailable() { - throw new UnsupportedOperationException("Use NearSpansPayloadOrdered instead"); + return false; } @Override diff --git a/lucene/core/src/java/org/apache/lucene/search/spans/NearSpansPayloadOrdered.java b/lucene/core/src/java/org/apache/lucene/search/spans/NearSpansPayloadOrdered.java index b2ea4e85679..163aef80b40 100644 --- a/lucene/core/src/java/org/apache/lucene/search/spans/NearSpansPayloadOrdered.java +++ b/lucene/core/src/java/org/apache/lucene/search/spans/NearSpansPayloadOrdered.java @@ -47,7 +47,7 @@ public class NearSpansPayloadOrdered extends NearSpansOrdered { * Also collect the payloads. */ protected boolean shrinkToAfterShortestMatch() throws IOException { - Spans lastSubSpans = subSpans.get(subSpans.size() - 1); + Spans lastSubSpans = subSpans[subSpans.length - 1]; matchStart = lastSubSpans.startPosition(); matchEnd = lastSubSpans.endPosition(); @@ -62,9 +62,8 @@ public class NearSpansPayloadOrdered extends NearSpansOrdered { int matchSlop = 0; int lastStart = matchStart; - int lastEnd = matchEnd; - for (int i = subSpans.size() - 2; i >= 0; i--) { - Spans prevSpans = subSpans.get(i); + for (int i = subSpans.length - 2; i >= 0; i--) { + Spans prevSpans = subSpans[i]; if (prevSpans.isPayloadAvailable()) { Collection payload = prevSpans.getPayload(); @@ -112,7 +111,6 @@ public class NearSpansPayloadOrdered extends NearSpansOrdered { */ matchStart = prevStart; lastStart = prevStart; - lastEnd = prevEnd; } boolean match = matchSlop <= allowedSlop; diff --git a/lucene/core/src/java/org/apache/lucene/search/spans/SpanNearQuery.java b/lucene/core/src/java/org/apache/lucene/search/spans/SpanNearQuery.java index 71b49014133..dbf7cdff5d2 100644 --- a/lucene/core/src/java/org/apache/lucene/search/spans/SpanNearQuery.java +++ b/lucene/core/src/java/org/apache/lucene/search/spans/SpanNearQuery.java @@ -18,19 +18,17 @@ package org.apache.lucene.search.spans; */ import java.io.IOException; - - import java.util.List; import java.util.ArrayList; import java.util.Iterator; import java.util.Map; import java.util.Set; - import org.apache.lucene.index.LeafReaderContext; import org.apache.lucene.index.IndexReader; import org.apache.lucene.index.Term; import org.apache.lucene.index.TermContext; +import org.apache.lucene.index.Terms; import org.apache.lucene.search.Query; import org.apache.lucene.util.Bits; import org.apache.lucene.util.ToStringUtils; @@ -131,10 +129,15 @@ public class SpanNearQuery extends SpanQuery implements Cloneable { return null; // all required } } + + Terms terms = context.reader().terms(field); + if (terms == null) { + return null; // field does not exist + } // all NearSpans require at least two subSpans return (! inOrder) ? new NearSpansUnordered(this, subSpans) - : collectPayloads ? new NearSpansPayloadOrdered(this, subSpans) + : collectPayloads && terms.hasPayloads() ? new NearSpansPayloadOrdered(this, subSpans) : new NearSpansOrdered(this, subSpans); } From d5749d6eccfcf4ddcb57e5e48b70088cf6a277fb Mon Sep 17 00:00:00 2001 From: Shawn Heisey Date: Fri, 3 Apr 2015 15:10:00 +0000 Subject: [PATCH 7/9] SOLR-6865: Upgrade HttpClient/Core/Mime to 4.4.1. git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1671085 13f79535-47bb-0310-9956-ffa450edef68 --- lucene/ivy-versions.properties | 6 +++--- lucene/licenses/httpclient-4.3.1.jar.sha1 | 1 - lucene/licenses/httpclient-4.4.1.jar.sha1 | 1 + lucene/licenses/httpcore-4.3.jar.sha1 | 1 - lucene/licenses/httpcore-4.4.1.jar.sha1 | 1 + solr/CHANGES.txt | 5 +++++ solr/licenses/httpclient-4.3.1.jar.sha1 | 1 - solr/licenses/httpclient-4.4.1.jar.sha1 | 1 + solr/licenses/httpcore-4.3.jar.sha1 | 1 - solr/licenses/httpcore-4.4.1.jar.sha1 | 1 + solr/licenses/httpmime-4.3.1.jar.sha1 | 1 - solr/licenses/httpmime-4.4.1.jar.sha1 | 1 + 12 files changed, 13 insertions(+), 8 deletions(-) delete mode 100644 lucene/licenses/httpclient-4.3.1.jar.sha1 create mode 100644 lucene/licenses/httpclient-4.4.1.jar.sha1 delete mode 100644 lucene/licenses/httpcore-4.3.jar.sha1 create mode 100644 lucene/licenses/httpcore-4.4.1.jar.sha1 delete mode 100644 solr/licenses/httpclient-4.3.1.jar.sha1 create mode 100644 solr/licenses/httpclient-4.4.1.jar.sha1 delete mode 100644 solr/licenses/httpcore-4.3.jar.sha1 create mode 100644 solr/licenses/httpcore-4.4.1.jar.sha1 delete mode 100644 solr/licenses/httpmime-4.3.1.jar.sha1 create mode 100644 solr/licenses/httpmime-4.4.1.jar.sha1 diff --git a/lucene/ivy-versions.properties b/lucene/ivy-versions.properties index ba0bd01a58d..bcc0a11e971 100644 --- a/lucene/ivy-versions.properties +++ b/lucene/ivy-versions.properties @@ -116,9 +116,9 @@ org.apache.hadoop.version = 2.6.0 # The httpcore version is often different from the httpclient and httpmime versions, # so the httpcore version value should not share the same symbolic name with them. -/org.apache.httpcomponents/httpclient = 4.3.1 -/org.apache.httpcomponents/httpcore = 4.3 -/org.apache.httpcomponents/httpmime = 4.3.1 +/org.apache.httpcomponents/httpclient = 4.4.1 +/org.apache.httpcomponents/httpcore = 4.4.1 +/org.apache.httpcomponents/httpmime = 4.4.1 /org.apache.ivy/ivy = 2.3.0 diff --git a/lucene/licenses/httpclient-4.3.1.jar.sha1 b/lucene/licenses/httpclient-4.3.1.jar.sha1 deleted file mode 100644 index 0e6a5bb76f7..00000000000 --- a/lucene/licenses/httpclient-4.3.1.jar.sha1 +++ /dev/null @@ -1 +0,0 @@ -0ec13f6423eb6d5858e229939a2bc118473ef94c diff --git a/lucene/licenses/httpclient-4.4.1.jar.sha1 b/lucene/licenses/httpclient-4.4.1.jar.sha1 new file mode 100644 index 00000000000..de87c5c21dc --- /dev/null +++ b/lucene/licenses/httpclient-4.4.1.jar.sha1 @@ -0,0 +1 @@ +016d0bc512222f1253ee6b64d389c84e22f697f0 diff --git a/lucene/licenses/httpcore-4.3.jar.sha1 b/lucene/licenses/httpcore-4.3.jar.sha1 deleted file mode 100644 index 07ffe5f8cb3..00000000000 --- a/lucene/licenses/httpcore-4.3.jar.sha1 +++ /dev/null @@ -1 +0,0 @@ -11393498b38e9695d0850cac26fde5613ae268b9 diff --git a/lucene/licenses/httpcore-4.4.1.jar.sha1 b/lucene/licenses/httpcore-4.4.1.jar.sha1 new file mode 100644 index 00000000000..0325419ee7f --- /dev/null +++ b/lucene/licenses/httpcore-4.4.1.jar.sha1 @@ -0,0 +1 @@ +f5aa318bda4c6c8d688c9d00b90681dcd82ce636 diff --git a/solr/CHANGES.txt b/solr/CHANGES.txt index 215b78e0de3..e962fffa1d4 100644 --- a/solr/CHANGES.txt +++ b/solr/CHANGES.txt @@ -93,6 +93,11 @@ Optimizations * SOLR-7324: IndexFetcher does not need to call isIndexStale if full copy is already needed (Stephan Lagraulet via Varun Thacker) +Other Changes +---------------------- + +* SOLR-6865: Upgrade HttpClient to 4.4.1 (Shawn Heisey) + ================== 5.1.0 ================== Consult the LUCENE_CHANGES.txt file for additional, low level, changes in this release diff --git a/solr/licenses/httpclient-4.3.1.jar.sha1 b/solr/licenses/httpclient-4.3.1.jar.sha1 deleted file mode 100644 index 0e6a5bb76f7..00000000000 --- a/solr/licenses/httpclient-4.3.1.jar.sha1 +++ /dev/null @@ -1 +0,0 @@ -0ec13f6423eb6d5858e229939a2bc118473ef94c diff --git a/solr/licenses/httpclient-4.4.1.jar.sha1 b/solr/licenses/httpclient-4.4.1.jar.sha1 new file mode 100644 index 00000000000..de87c5c21dc --- /dev/null +++ b/solr/licenses/httpclient-4.4.1.jar.sha1 @@ -0,0 +1 @@ +016d0bc512222f1253ee6b64d389c84e22f697f0 diff --git a/solr/licenses/httpcore-4.3.jar.sha1 b/solr/licenses/httpcore-4.3.jar.sha1 deleted file mode 100644 index 07ffe5f8cb3..00000000000 --- a/solr/licenses/httpcore-4.3.jar.sha1 +++ /dev/null @@ -1 +0,0 @@ -11393498b38e9695d0850cac26fde5613ae268b9 diff --git a/solr/licenses/httpcore-4.4.1.jar.sha1 b/solr/licenses/httpcore-4.4.1.jar.sha1 new file mode 100644 index 00000000000..0325419ee7f --- /dev/null +++ b/solr/licenses/httpcore-4.4.1.jar.sha1 @@ -0,0 +1 @@ +f5aa318bda4c6c8d688c9d00b90681dcd82ce636 diff --git a/solr/licenses/httpmime-4.3.1.jar.sha1 b/solr/licenses/httpmime-4.3.1.jar.sha1 deleted file mode 100644 index fb256890c78..00000000000 --- a/solr/licenses/httpmime-4.3.1.jar.sha1 +++ /dev/null @@ -1 +0,0 @@ -f7899276dddd01d8a42ecfe27e7031fcf9824422 diff --git a/solr/licenses/httpmime-4.4.1.jar.sha1 b/solr/licenses/httpmime-4.4.1.jar.sha1 new file mode 100644 index 00000000000..b82d4524c9c --- /dev/null +++ b/solr/licenses/httpmime-4.4.1.jar.sha1 @@ -0,0 +1 @@ +2f8757f5ac5e38f46c794e5229d1f3c522e9b1df From dcdb496bef49e5f769f5a1ed2c5b858f01c1669d Mon Sep 17 00:00:00 2001 From: Robert Muir Date: Fri, 3 Apr 2015 18:38:27 +0000 Subject: [PATCH 8/9] LUCENE-6391: Give SpanScorer two-phase iterator support git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1671123 13f79535-47bb-0310-9956-ffa450edef68 --- lucene/CHANGES.txt | 3 +- .../search/payloads/PayloadNearQuery.java | 14 ++-- .../search/payloads/PayloadTermQuery.java | 16 ++-- .../search/spans/SpanPositionCheckQuery.java | 4 +- .../lucene/search/spans/SpanScorer.java | 82 ++++++++++++------- .../search/spans/JustCompileSearchSpans.java | 7 +- 6 files changed, 75 insertions(+), 51 deletions(-) diff --git a/lucene/CHANGES.txt b/lucene/CHANGES.txt index 0e2ec1129d4..3167a5c30c7 100644 --- a/lucene/CHANGES.txt +++ b/lucene/CHANGES.txt @@ -40,7 +40,8 @@ API Changes New Features -* LUCENE-6308, LUCENE-6385: Span queries now share document conjunction/intersection +* LUCENE-6308, LUCENE-6385, LUCENE-6391: Span queries now share + document conjunction/intersection code with boolean queries, and use two-phased iterators for faster intersection by avoiding loading positions in certain cases. (Paul Elschot, Terry Smith, Robert Muir via Mike McCandless) diff --git a/lucene/core/src/java/org/apache/lucene/search/payloads/PayloadNearQuery.java b/lucene/core/src/java/org/apache/lucene/search/payloads/PayloadNearQuery.java index c7007e17712..6658b0d0263 100644 --- a/lucene/core/src/java/org/apache/lucene/search/payloads/PayloadNearQuery.java +++ b/lucene/core/src/java/org/apache/lucene/search/payloads/PayloadNearQuery.java @@ -232,8 +232,8 @@ public class PayloadNearQuery extends SpanNearQuery { scratch.bytes = thePayload; scratch.offset = 0; scratch.length = thePayload.length; - payloadScore = function.currentScore(doc, fieldName, start, end, - payloadsSeen, payloadScore, docScorer.computePayloadFactor(doc, + payloadScore = function.currentScore(docID(), fieldName, start, end, + payloadsSeen, payloadScore, docScorer.computePayloadFactor(docID(), spans.startPosition(), spans.endPosition(), scratch)); ++payloadsSeen; } @@ -241,7 +241,7 @@ public class PayloadNearQuery extends SpanNearQuery { // @Override - protected boolean setFreqCurrentDoc() throws IOException { + protected void setFreqCurrentDoc() throws IOException { freq = 0.0f; payloadScore = 0; payloadsSeen = 0; @@ -255,14 +255,12 @@ public class PayloadNearQuery extends SpanNearQuery { getPayloads(spansArr); startPos = spans.nextStartPosition(); } while (startPos != Spans.NO_MORE_POSITIONS); - return true; } @Override - public float score() throws IOException { - - return super.score() - * function.docScore(doc, fieldName, payloadsSeen, payloadScore); + public float scoreCurrentDoc() throws IOException { + return super.scoreCurrentDoc() + * function.docScore(docID(), fieldName, payloadsSeen, payloadScore); } } diff --git a/lucene/core/src/java/org/apache/lucene/search/payloads/PayloadTermQuery.java b/lucene/core/src/java/org/apache/lucene/search/payloads/PayloadTermQuery.java index 977ed262c46..2419f03f92e 100644 --- a/lucene/core/src/java/org/apache/lucene/search/payloads/PayloadTermQuery.java +++ b/lucene/core/src/java/org/apache/lucene/search/payloads/PayloadTermQuery.java @@ -99,7 +99,7 @@ public class PayloadTermQuery extends SpanTermQuery { } @Override - protected boolean setFreqCurrentDoc() throws IOException { + protected void setFreqCurrentDoc() throws IOException { freq = 0.0f; numMatches = 0; payloadScore = 0; @@ -115,7 +115,6 @@ public class PayloadTermQuery extends SpanTermQuery { startPos = spans.nextStartPosition(); } while (startPos != Spans.NO_MORE_POSITIONS); - return freq != 0; } protected void processPayload(Similarity similarity) throws IOException { @@ -123,11 +122,11 @@ public class PayloadTermQuery extends SpanTermQuery { final PostingsEnum postings = termSpans.getPostings(); payload = postings.getPayload(); if (payload != null) { - payloadScore = function.currentScore(doc, term.field(), + payloadScore = function.currentScore(docID(), term.field(), spans.startPosition(), spans.endPosition(), payloadsSeen, payloadScore, - docScorer.computePayloadFactor(doc, spans.startPosition(), spans.endPosition(), payload)); + docScorer.computePayloadFactor(docID(), spans.startPosition(), spans.endPosition(), payload)); } else { - payloadScore = function.currentScore(doc, term.field(), + payloadScore = function.currentScore(docID(), term.field(), spans.startPosition(), spans.endPosition(), payloadsSeen, payloadScore, 1F); } payloadsSeen++; @@ -143,8 +142,7 @@ public class PayloadTermQuery extends SpanTermQuery { * @throws IOException if there is a low-level I/O error */ @Override - public float score() throws IOException { - + public float scoreCurrentDoc() throws IOException { return includeSpanScore ? getSpanScore() * getPayloadScore() : getPayloadScore(); } @@ -160,7 +158,7 @@ public class PayloadTermQuery extends SpanTermQuery { * @see #score() */ protected float getSpanScore() throws IOException { - return super.score(); + return super.scoreCurrentDoc(); } /** @@ -170,7 +168,7 @@ public class PayloadTermQuery extends SpanTermQuery { * {@link PayloadFunction#docScore(int, String, int, float)} */ protected float getPayloadScore() { - return function.docScore(doc, term.field(), payloadsSeen, payloadScore); + return function.docScore(docID(), term.field(), payloadsSeen, payloadScore); } } diff --git a/lucene/core/src/java/org/apache/lucene/search/spans/SpanPositionCheckQuery.java b/lucene/core/src/java/org/apache/lucene/search/spans/SpanPositionCheckQuery.java index 2df1e5e3ff3..b65a4340da4 100644 --- a/lucene/core/src/java/org/apache/lucene/search/spans/SpanPositionCheckQuery.java +++ b/lucene/core/src/java/org/apache/lucene/search/spans/SpanPositionCheckQuery.java @@ -146,7 +146,7 @@ public abstract class SpanPositionCheckQuery extends SpanQuery implements Clonea startPos = in.nextStartPosition(); assert startPos != NO_MORE_POSITIONS; for (;;) { - switch(acceptPosition(this)) { + switch(acceptPosition(in)) { case YES: atFirstInCurrentDoc = true; return in.docID(); @@ -180,7 +180,7 @@ public abstract class SpanPositionCheckQuery extends SpanQuery implements Clonea if (startPos == NO_MORE_POSITIONS) { return NO_MORE_POSITIONS; } - switch(acceptPosition(this)) { + switch(acceptPosition(in)) { case YES: return startPos; case NO: diff --git a/lucene/core/src/java/org/apache/lucene/search/spans/SpanScorer.java b/lucene/core/src/java/org/apache/lucene/search/spans/SpanScorer.java index 3c9a90eb7ac..7b8dea3c724 100644 --- a/lucene/core/src/java/org/apache/lucene/search/spans/SpanScorer.java +++ b/lucene/core/src/java/org/apache/lucene/search/spans/SpanScorer.java @@ -21,48 +21,58 @@ import java.io.IOException; import java.util.Objects; import org.apache.lucene.search.Scorer; +import org.apache.lucene.search.TwoPhaseIterator; import org.apache.lucene.search.similarities.Similarity; /** * Public for extension only. */ public class SpanScorer extends Scorer { - protected Spans spans; - - protected int doc; - protected float freq; - protected int numMatches; + /** underlying spans we are scoring from */ + protected final Spans spans; + /** similarity used in default score impl */ protected final Similarity.SimScorer docScorer; - protected SpanScorer(Spans spans, SpanWeight weight, Similarity.SimScorer docScorer) - throws IOException { + /** accumulated sloppy freq (computed in setFreqCurrentDoc) */ + protected float freq; + /** number of matches (computed in setFreqCurrentDoc) */ + protected int numMatches; + + private int lastScoredDoc = -1; // last doc we called setFreqCurrentDoc() for + + protected SpanScorer(Spans spans, SpanWeight weight, Similarity.SimScorer docScorer) throws IOException { super(weight); this.docScorer = Objects.requireNonNull(docScorer); this.spans = Objects.requireNonNull(spans); - this.doc = -1; } @Override - public int nextDoc() throws IOException { - int prevDoc = doc; - doc = spans.nextDoc(); - if (doc != NO_MORE_DOCS) { - setFreqCurrentDoc(); - } - return doc; + public final int nextDoc() throws IOException { + return spans.nextDoc(); } @Override - public int advance(int target) throws IOException { - int prevDoc = doc; - doc = spans.advance(target); - if (doc != NO_MORE_DOCS) { + public final int advance(int target) throws IOException { + return spans.advance(target); + } + + /** + * Ensure setFreqCurrentDoc is called, if not already called for the current doc. + */ + private final void ensureFreq() throws IOException { + int currentDoc = spans.docID(); + if (lastScoredDoc != currentDoc) { setFreqCurrentDoc(); + lastScoredDoc = currentDoc; } - return doc; } - protected boolean setFreqCurrentDoc() throws IOException { + /** + * Sets {@link #freq} and {@link #numMatches} for the current document. + *

+ * This will be called at most once per document. + */ + protected void setFreqCurrentDoc() throws IOException { freq = 0.0f; numMatches = 0; @@ -90,34 +100,46 @@ public class SpanScorer extends Scorer { assert spans.startPosition() == Spans.NO_MORE_POSITIONS : "incorrect final start position, spans="+spans; assert spans.endPosition() == Spans.NO_MORE_POSITIONS : "incorrect final end position, spans="+spans; - - return true; + } + + /** + * Score the current doc. The default implementation scores the doc + * with the similarity using the slop-adjusted {@link #freq}. + */ + protected float scoreCurrentDoc() throws IOException { + return docScorer.score(spans.docID(), freq); } @Override - public int docID() { return doc; } + public final int docID() { return spans.docID(); } @Override - public float score() throws IOException { - float s = docScorer.score(doc, freq); - return s; + public final float score() throws IOException { + ensureFreq(); + return scoreCurrentDoc(); } @Override - public int freq() throws IOException { + public final int freq() throws IOException { + ensureFreq(); return numMatches; } /** Returns the intermediate "sloppy freq" adjusted for edit distance * @lucene.internal */ // only public so .payloads can see it. - public float sloppyFreq() throws IOException { + public final float sloppyFreq() throws IOException { + ensureFreq(); return freq; } @Override - public long cost() { + public final long cost() { return spans.cost(); } + @Override + public final TwoPhaseIterator asTwoPhaseIterator() { + return spans.asTwoPhaseIterator(); + } } diff --git a/lucene/core/src/test/org/apache/lucene/search/spans/JustCompileSearchSpans.java b/lucene/core/src/test/org/apache/lucene/search/spans/JustCompileSearchSpans.java index 1fec61daf28..5c1b6173825 100644 --- a/lucene/core/src/test/org/apache/lucene/search/spans/JustCompileSearchSpans.java +++ b/lucene/core/src/test/org/apache/lucene/search/spans/JustCompileSearchSpans.java @@ -162,7 +162,12 @@ final class JustCompileSearchSpans { } @Override - protected boolean setFreqCurrentDoc() { + protected void setFreqCurrentDoc() { + throw new UnsupportedOperationException(UNSUPPORTED_MSG); + } + + @Override + protected float scoreCurrentDoc() throws IOException { throw new UnsupportedOperationException(UNSUPPORTED_MSG); } } From 7ee772616fcf9ce5476d1d1bfc41f35e0f124d1f Mon Sep 17 00:00:00 2001 From: Robert Muir Date: Fri, 3 Apr 2015 19:43:41 +0000 Subject: [PATCH 9/9] LUCENE-6393: add equivalence tests for SpanFirstQuery. git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1671137 13f79535-47bb-0310-9956-ffa450edef68 --- .../spans/TestSpanSearchEquivalence.java | 119 ++++++++++++++++++ .../search/SearchEquivalenceTestBase.java | 3 + 2 files changed, 122 insertions(+) diff --git a/lucene/core/src/test/org/apache/lucene/search/spans/TestSpanSearchEquivalence.java b/lucene/core/src/test/org/apache/lucene/search/spans/TestSpanSearchEquivalence.java index 94c5db302e7..3fc51012ff6 100644 --- a/lucene/core/src/test/org/apache/lucene/search/spans/TestSpanSearchEquivalence.java +++ b/lucene/core/src/test/org/apache/lucene/search/spans/TestSpanSearchEquivalence.java @@ -21,6 +21,7 @@ import org.apache.lucene.index.Term; import org.apache.lucene.search.BooleanClause.Occur; import org.apache.lucene.search.BooleanQuery; import org.apache.lucene.search.PhraseQuery; +import org.apache.lucene.search.Query; import org.apache.lucene.search.SearchEquivalenceTestBase; import org.apache.lucene.search.TermQuery; @@ -106,4 +107,122 @@ public class TestSpanSearchEquivalence extends SearchEquivalenceTestBase { SpanNearQuery q2 = new SpanNearQuery(subquery, 3, false); assertSubsetOf(q1, q2); } + + /** SpanNearQuery([A B], N, false) ⊆ SpanNearQuery([A B], N+1, false) */ + public void testSpanNearIncreasingSloppiness() throws Exception { + Term t1 = randomTerm(); + Term t2 = randomTerm(); + SpanQuery subquery[] = new SpanQuery[] { new SpanTermQuery(t1), new SpanTermQuery(t2) }; + for (int i = 0; i < 10; i++) { + SpanNearQuery q1 = new SpanNearQuery(subquery, i, false); + SpanNearQuery q2 = new SpanNearQuery(subquery, i+1, false); + assertSubsetOf(q1, q2); + } + } + + /** SpanNearQuery([A B C], N, false) ⊆ SpanNearQuery([A B C], N+1, false) */ + public void testSpanNearIncreasingSloppiness3() throws Exception { + Term t1 = randomTerm(); + Term t2 = randomTerm(); + Term t3 = randomTerm(); + SpanQuery subquery[] = new SpanQuery[] { new SpanTermQuery(t1), new SpanTermQuery(t2), new SpanTermQuery(t3) }; + for (int i = 0; i < 10; i++) { + SpanNearQuery q1 = new SpanNearQuery(subquery, i, false); + SpanNearQuery q2 = new SpanNearQuery(subquery, i+1, false); + assertSubsetOf(q1, q2); + } + } + + /** SpanNearQuery([A B], N, true) ⊆ SpanNearQuery([A B], N+1, true) */ + public void testSpanNearIncreasingOrderedSloppiness() throws Exception { + Term t1 = randomTerm(); + Term t2 = randomTerm(); + SpanQuery subquery[] = new SpanQuery[] { new SpanTermQuery(t1), new SpanTermQuery(t2) }; + for (int i = 0; i < 10; i++) { + SpanNearQuery q1 = new SpanNearQuery(subquery, i, false); + SpanNearQuery q2 = new SpanNearQuery(subquery, i+1, false); + assertSubsetOf(q1, q2); + } + } + + /** SpanNearQuery([A B C], N, true) ⊆ SpanNearQuery([A B C], N+1, true) */ + public void testSpanNearIncreasingOrderedSloppiness3() throws Exception { + Term t1 = randomTerm(); + Term t2 = randomTerm(); + Term t3 = randomTerm(); + SpanQuery subquery[] = new SpanQuery[] { new SpanTermQuery(t1), new SpanTermQuery(t2), new SpanTermQuery(t3) }; + for (int i = 0; i < 10; i++) { + SpanNearQuery q1 = new SpanNearQuery(subquery, i, true); + SpanNearQuery q2 = new SpanNearQuery(subquery, i+1, true); + assertSubsetOf(q1, q2); + } + } + + /** SpanFirstQuery(A, N) ⊆ TermQuery(A) */ + public void testSpanFirstTerm() throws Exception { + Term t1 = randomTerm(); + for (int i = 0; i < 10; i++) { + Query q1 = new SpanFirstQuery(new SpanTermQuery(t1), i); + Query q2 = new TermQuery(t1); + assertSubsetOf(q1, q2); + } + } + + /** SpanFirstQuery(A, N) ⊆ SpanFirstQuery(A, N+1) */ + public void testSpanFirstTermIncreasing() throws Exception { + Term t1 = randomTerm(); + for (int i = 0; i < 10; i++) { + Query q1 = new SpanFirstQuery(new SpanTermQuery(t1), i); + Query q2 = new SpanFirstQuery(new SpanTermQuery(t1), i+1); + assertSubsetOf(q1, q2); + } + } + + /** SpanFirstQuery(A, ∞) = TermQuery(A) */ + public void testSpanFirstTermEverything() throws Exception { + Term t1 = randomTerm(); + Query q1 = new SpanFirstQuery(new SpanTermQuery(t1), Integer.MAX_VALUE); + Query q2 = new TermQuery(t1); + assertSameSet(q1, q2); + } + + /** SpanFirstQuery([A B], N) ⊆ SpanNearQuery([A B]) */ + @AwaitsFix(bugUrl = "https://issues.apache.org/jira/browse/LUCENE-6393") + public void testSpanFirstNear() throws Exception { + Term t1 = randomTerm(); + Term t2 = randomTerm(); + SpanQuery subquery[] = new SpanQuery[] { new SpanTermQuery(t1), new SpanTermQuery(t2) }; + SpanQuery nearQuery = new SpanNearQuery(subquery, 10, true); + for (int i = 0; i < 10; i++) { + Query q1 = new SpanFirstQuery(nearQuery, i); + Query q2 = nearQuery; + assertSubsetOf(q1, q2); + } + } + + /** SpanFirstQuery([A B], N) ⊆ SpanFirstQuery([A B], N+1) */ + @AwaitsFix(bugUrl = "https://issues.apache.org/jira/browse/LUCENE-6393") + public void testSpanFirstNearIncreasing() throws Exception { + Term t1 = randomTerm(); + Term t2 = randomTerm(); + SpanQuery subquery[] = new SpanQuery[] { new SpanTermQuery(t1), new SpanTermQuery(t2) }; + SpanQuery nearQuery = new SpanNearQuery(subquery, 10, true); + for (int i = 0; i < 10; i++) { + Query q1 = new SpanFirstQuery(nearQuery, i); + Query q2 = new SpanFirstQuery(nearQuery, i+1); + assertSubsetOf(q1, q2); + } + } + + /** SpanFirstQuery([A B], ∞) = SpanNearQuery([A B]) */ + @AwaitsFix(bugUrl = "https://issues.apache.org/jira/browse/LUCENE-6393") + public void testSpanFirstNearEverything() throws Exception { + Term t1 = randomTerm(); + Term t2 = randomTerm(); + SpanQuery subquery[] = new SpanQuery[] { new SpanTermQuery(t1), new SpanTermQuery(t2) }; + SpanQuery nearQuery = new SpanNearQuery(subquery, 10, true); + Query q1 = new SpanFirstQuery(nearQuery, Integer.MAX_VALUE); + Query q2 = nearQuery; + assertSameSet(q1, q2); + } } diff --git a/lucene/test-framework/src/java/org/apache/lucene/search/SearchEquivalenceTestBase.java b/lucene/test-framework/src/java/org/apache/lucene/search/SearchEquivalenceTestBase.java index 40ea56e3f60..c756d787852 100644 --- a/lucene/test-framework/src/java/org/apache/lucene/search/SearchEquivalenceTestBase.java +++ b/lucene/test-framework/src/java/org/apache/lucene/search/SearchEquivalenceTestBase.java @@ -263,6 +263,9 @@ public abstract class SearchEquivalenceTestBase extends LuceneTestCase { * Both queries will be filtered by filter */ protected void assertSubsetOf(Query q1, Query q2, Filter filter) throws Exception { + QueryUtils.check(q1); + QueryUtils.check(q2); + if (filter != null) { q1 = new FilteredQuery(q1, filter); q2 = new FilteredQuery(q2, filter);