diff --git a/lucene/CHANGES.txt b/lucene/CHANGES.txt index bb3e19d24b2..b781234d632 100644 --- a/lucene/CHANGES.txt +++ b/lucene/CHANGES.txt @@ -15,6 +15,11 @@ Changes in Runtime Behavior ======================= Lucene 7.1.0 ======================= +New Features + +* LUCENE-7621: Add CoveringQuery, a query whose required number of matching + clauses can be defined per document. (Adrien Grand) + Optimizations * LUCENE-7905: Optimize how OrdinalMap (used by @@ -28,6 +33,9 @@ Optimizations than 8x greater than the cost of the lead iterator in order to use doc values. (Murali Krishna P via Adrien Grand) +* LUCENE-7925: Collapse duplicate SHOULD or MUST clauses by summing up their + boosts. (Adrien Grand) + Bug Fixes * LUCENE-7916: Prevent ArrayIndexOutOfBoundsException if ICUTokenizer is used @@ -35,6 +43,12 @@ Bug Fixes not recommended, lucene-analyzers-icu contains binary data structures specific to ICU/Unicode versions it is built against. (Chris Koenig, Robert Muir) +Build + +* SOLR-11181: Switch order of maven artifact publishing procedure: deploy first + instead of locally installing first, to workaround a double repository push of + *-sources.jar and *-javadoc.jar files. (Lynn Monson via Steve Rowe) + ======================= Lucene 7.0.0 ======================= New Features diff --git a/lucene/common-build.xml b/lucene/common-build.xml index 50a758b32f8..c0965af5aa5 100644 --- a/lucene/common-build.xml +++ b/lucene/common-build.xml @@ -559,10 +559,6 @@ - - - - @@ -570,6 +566,10 @@ + + + + diff --git a/lucene/core/src/java/org/apache/lucene/search/BooleanQuery.java b/lucene/core/src/java/org/apache/lucene/search/BooleanQuery.java index b1f507a05c2..a3f5ae0416d 100644 --- a/lucene/core/src/java/org/apache/lucene/search/BooleanQuery.java +++ b/lucene/core/src/java/org/apache/lucene/search/BooleanQuery.java @@ -23,6 +23,7 @@ import java.util.Arrays; import java.util.Collection; import java.util.Collections; import java.util.EnumMap; +import java.util.HashMap; import java.util.HashSet; import java.util.Iterator; import java.util.List; @@ -332,6 +333,69 @@ public class BooleanQuery extends Query implements Iterable { } } + // Deduplicate SHOULD clauses by summing up their boosts + if (clauseSets.get(Occur.SHOULD).size() > 0 && minimumNumberShouldMatch <= 1) { + Map shouldClauses = new HashMap<>(); + for (Query query : clauseSets.get(Occur.SHOULD)) { + double boost = 1; + while (query instanceof BoostQuery) { + BoostQuery bq = (BoostQuery) query; + boost *= bq.getBoost(); + query = bq.getQuery(); + } + shouldClauses.put(query, shouldClauses.getOrDefault(query, 0d) + boost); + } + if (shouldClauses.size() != clauseSets.get(Occur.SHOULD).size()) { + BooleanQuery.Builder builder = new BooleanQuery.Builder() + .setMinimumNumberShouldMatch(minimumNumberShouldMatch); + for (Map.Entry entry : shouldClauses.entrySet()) { + Query query = entry.getKey(); + float boost = entry.getValue().floatValue(); + if (boost != 1f) { + query = new BoostQuery(query, boost); + } + builder.add(query, Occur.SHOULD); + } + for (BooleanClause clause : clauses) { + if (clause.getOccur() != Occur.SHOULD) { + builder.add(clause); + } + } + return builder.build(); + } + } + + // Deduplicate MUST clauses by summing up their boosts + if (clauseSets.get(Occur.MUST).size() > 0) { + Map mustClauses = new HashMap<>(); + for (Query query : clauseSets.get(Occur.MUST)) { + double boost = 1; + while (query instanceof BoostQuery) { + BoostQuery bq = (BoostQuery) query; + boost *= bq.getBoost(); + query = bq.getQuery(); + } + mustClauses.put(query, mustClauses.getOrDefault(query, 0d) + boost); + } + if (mustClauses.size() != clauseSets.get(Occur.MUST).size()) { + BooleanQuery.Builder builder = new BooleanQuery.Builder() + .setMinimumNumberShouldMatch(minimumNumberShouldMatch); + for (Map.Entry entry : mustClauses.entrySet()) { + Query query = entry.getKey(); + float boost = entry.getValue().floatValue(); + if (boost != 1f) { + query = new BoostQuery(query, boost); + } + builder.add(query, Occur.MUST); + } + for (BooleanClause clause : clauses) { + if (clause.getOccur() != Occur.MUST) { + builder.add(clause); + } + } + return builder.build(); + } + } // Rewrite queries whose single scoring clause is a MUST clause on a // MatchAllDocsQuery to a ConstantScoreQuery diff --git a/lucene/core/src/test/org/apache/lucene/search/TestBooleanRewrites.java b/lucene/core/src/test/org/apache/lucene/search/TestBooleanRewrites.java index 1fd3d4b859d..9337bf7104a 100644 --- a/lucene/core/src/test/org/apache/lucene/search/TestBooleanRewrites.java +++ b/lucene/core/src/test/org/apache/lucene/search/TestBooleanRewrites.java @@ -427,4 +427,57 @@ public class TestBooleanRewrites extends LuceneTestCase { assertEquals(expectedScore, actualScore, expectedScore / 100); // error under 1% } } + + public void testDeduplicateShouldClauses() throws IOException { + IndexSearcher searcher = newSearcher(new MultiReader()); + + Query query = new BooleanQuery.Builder() + .add(new TermQuery(new Term("foo", "bar")), Occur.SHOULD) + .add(new TermQuery(new Term("foo", "bar")), Occur.SHOULD) + .build(); + Query expected = new BoostQuery(new TermQuery(new Term("foo", "bar")), 2); + assertEquals(expected, searcher.rewrite(query)); + + query = new BooleanQuery.Builder() + .add(new TermQuery(new Term("foo", "bar")), Occur.SHOULD) + .add(new BoostQuery(new TermQuery(new Term("foo", "bar")), 2), Occur.SHOULD) + .add(new TermQuery(new Term("foo", "quux")), Occur.SHOULD) + .build(); + expected = new BooleanQuery.Builder() + .add(new BoostQuery(new TermQuery(new Term("foo", "bar")), 3), Occur.SHOULD) + .add(new TermQuery(new Term("foo", "quux")), Occur.SHOULD) + .build(); + assertEquals(expected, searcher.rewrite(query)); + + query = new BooleanQuery.Builder() + .setMinimumNumberShouldMatch(2) + .add(new TermQuery(new Term("foo", "bar")), Occur.SHOULD) + .add(new TermQuery(new Term("foo", "bar")), Occur.SHOULD) + .add(new TermQuery(new Term("foo", "quux")), Occur.SHOULD) + .build(); + expected = query; + assertEquals(expected, searcher.rewrite(query)); + } + + public void testDeduplicateMustClauses() throws IOException { + IndexSearcher searcher = newSearcher(new MultiReader()); + + Query query = new BooleanQuery.Builder() + .add(new TermQuery(new Term("foo", "bar")), Occur.MUST) + .add(new TermQuery(new Term("foo", "bar")), Occur.MUST) + .build(); + Query expected = new BoostQuery(new TermQuery(new Term("foo", "bar")), 2); + assertEquals(expected, searcher.rewrite(query)); + + query = new BooleanQuery.Builder() + .add(new TermQuery(new Term("foo", "bar")), Occur.MUST) + .add(new BoostQuery(new TermQuery(new Term("foo", "bar")), 2), Occur.MUST) + .add(new TermQuery(new Term("foo", "quux")), Occur.MUST) + .build(); + expected = new BooleanQuery.Builder() + .add(new BoostQuery(new TermQuery(new Term("foo", "bar")), 3), Occur.MUST) + .add(new TermQuery(new Term("foo", "quux")), Occur.MUST) + .build(); + assertEquals(expected, searcher.rewrite(query)); + } } diff --git a/lucene/core/src/test/org/apache/lucene/search/TestConstantScoreQuery.java b/lucene/core/src/test/org/apache/lucene/search/TestConstantScoreQuery.java index 7671cc08570..f97afd5b6e3 100644 --- a/lucene/core/src/test/org/apache/lucene/search/TestConstantScoreQuery.java +++ b/lucene/core/src/test/org/apache/lucene/search/TestConstantScoreQuery.java @@ -90,7 +90,8 @@ public class TestConstantScoreQuery extends LuceneTestCase { RandomIndexWriter writer = new RandomIndexWriter (random(), directory); Document doc = new Document(); - doc.add(newStringField("field", "term", Field.Store.NO)); + doc.add(newStringField("field", "term1", Field.Store.NO)); + doc.add(newStringField("field", "term2", Field.Store.NO)); writer.addDocument(doc); reader = writer.getReader(); @@ -99,8 +100,8 @@ public class TestConstantScoreQuery extends LuceneTestCase { searcher = newSearcher(reader, true, false); searcher.setQueryCache(null); // to assert on scorer impl - final BoostQuery csq1 = new BoostQuery(new ConstantScoreQuery(new TermQuery(new Term ("field", "term"))), 2f); - final BoostQuery csq2 = new BoostQuery(new ConstantScoreQuery(csq1), 5f); + final BoostQuery csq1 = new BoostQuery(new ConstantScoreQuery(new TermQuery(new Term ("field", "term1"))), 2f); + final BoostQuery csq2 = new BoostQuery(new ConstantScoreQuery(new ConstantScoreQuery(new TermQuery(new Term ("field", "term2")))), 5f); final BooleanQuery.Builder bq = new BooleanQuery.Builder(); bq.add(csq1, BooleanClause.Occur.SHOULD); diff --git a/lucene/sandbox/src/java/org/apache/lucene/search/CoveringQuery.java b/lucene/sandbox/src/java/org/apache/lucene/search/CoveringQuery.java new file mode 100644 index 00000000000..288e05b05bf --- /dev/null +++ b/lucene/sandbox/src/java/org/apache/lucene/search/CoveringQuery.java @@ -0,0 +1,180 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.lucene.search; + +import java.io.IOException; +import java.util.ArrayList; +import java.util.Collection; +import java.util.List; +import java.util.Objects; +import java.util.Set; +import java.util.stream.Collectors; + +import org.apache.lucene.index.IndexReader; +import org.apache.lucene.index.LeafReaderContext; +import org.apache.lucene.index.Term; + +/** A {@link Query} that allows to have a configurable number or required + * matches per document. This is typically useful in order to build queries + * whose query terms must all appear in documents. + * @lucene.experimental + */ +public final class CoveringQuery extends Query { + + private final Collection queries; + private final LongValuesSource minimumNumberMatch; + private final int hashCode; + + /** + * Sole constructor. + * @param queries Sub queries to match. + * @param minimumNumberMatch Per-document long value that records how many queries + * should match. Values that are less than 1 are treated + * like 1: only documents that have at least one + * matching clause will be considered matches. Documents + * that do not have a value for minimumNumberMatch + * do not match. + */ + public CoveringQuery(Collection queries, LongValuesSource minimumNumberMatch) { + if (queries.size() > BooleanQuery.getMaxClauseCount()) { + throw new BooleanQuery.TooManyClauses(); + } + if (minimumNumberMatch.needsScores()) { + throw new IllegalArgumentException("The minimum number of matches may not depend on the score."); + } + this.queries = new Multiset<>(); + this.queries.addAll(queries); + this.minimumNumberMatch = Objects.requireNonNull(minimumNumberMatch); + this.hashCode = computeHashCode(); + } + + @Override + public String toString(String field) { + String queriesToString = queries.stream() + .map(q -> q.toString(field)) + .sorted() + .collect(Collectors.joining(", ")); + return "CoveringQuery(queries=[" + queriesToString + "], minimumNumberMatch=" + minimumNumberMatch + ")"; + } + + @Override + public boolean equals(Object obj) { + if (sameClassAs(obj) == false) { + return false; + } + CoveringQuery that = (CoveringQuery) obj; + return hashCode == that.hashCode // not necessary but makes equals faster + && Objects.equals(queries, that.queries) + && Objects.equals(minimumNumberMatch, that.minimumNumberMatch); + } + + private int computeHashCode() { + int h = classHash(); + h = 31 * h + queries.hashCode(); + h = 31 * h + minimumNumberMatch.hashCode(); + return h; + } + + @Override + public int hashCode() { + return hashCode; + } + + @Override + public Query rewrite(IndexReader reader) throws IOException { + Multiset rewritten = new Multiset<>(); + boolean actuallyRewritten = false; + for (Query query : queries) { + Query r = query.rewrite(reader); + rewritten.add(r); + actuallyRewritten |= query != r; + } + if (actuallyRewritten) { + return new CoveringQuery(rewritten, minimumNumberMatch); + } + return super.rewrite(reader); + } + + @Override + public Weight createWeight(IndexSearcher searcher, boolean needsScores, float boost) throws IOException { + final List weights = new ArrayList<>(queries.size()); + for (Query query : queries) { + weights.add(searcher.createWeight(query, needsScores, boost)); + } + return new CoveringWeight(this, weights, minimumNumberMatch); + } + + private static class CoveringWeight extends Weight { + + private final Collection weights; + private final LongValuesSource minimumNumberMatch; + + CoveringWeight(Query query, Collection weights, LongValuesSource minimumNumberMatch) { + super(query); + this.weights = weights; + this.minimumNumberMatch = minimumNumberMatch; + } + + @Override + public void extractTerms(Set terms) { + for (Weight weight : weights) { + weight.extractTerms(terms); + } + } + + @Override + public Explanation explain(LeafReaderContext context, int doc) throws IOException { + LongValues minMatchValues = minimumNumberMatch.getValues(context, null); + if (minMatchValues.advanceExact(doc) == false) { + return Explanation.noMatch("minimumNumberMatch has no value on the current document"); + } + final long minimumNumberMatch = Math.max(1, minMatchValues.longValue()); + int freq = 0; + double score = 0; + List subExpls = new ArrayList<>(); + for (Weight weight : weights) { + Explanation subExpl = weight.explain(context, doc); + if (subExpl.isMatch()) { + freq++; + score += subExpl.getValue(); + } + subExpls.add(subExpl); + } + if (freq >= minimumNumberMatch) { + return Explanation.match((float) score, freq + " matches for " + minimumNumberMatch + " required matches, sum of:", subExpls); + } else { + return Explanation.noMatch(freq + " matches for " + minimumNumberMatch + " required matches", subExpls); + } + } + + @Override + public Scorer scorer(LeafReaderContext context) throws IOException { + Collection scorers = new ArrayList<>(); + for (Weight w : weights) { + Scorer s = w.scorer(context); + if (s != null) { + scorers.add(s); + } + } + if (scorers.isEmpty()) { + return null; + } + return new CoveringScorer(this, scorers, minimumNumberMatch.getValues(context, null), context.reader().maxDoc()); + } + } + +} diff --git a/lucene/sandbox/src/java/org/apache/lucene/search/CoveringScorer.java b/lucene/sandbox/src/java/org/apache/lucene/search/CoveringScorer.java new file mode 100644 index 00000000000..8f62d236d46 --- /dev/null +++ b/lucene/sandbox/src/java/org/apache/lucene/search/CoveringScorer.java @@ -0,0 +1,225 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.lucene.search; + +import java.io.IOException; +import java.util.ArrayList; +import java.util.Collection; +import java.util.List; + +/** A {@link Scorer} whose number of matches is per-document. */ +final class CoveringScorer extends Scorer { + + final int numScorers; + final int maxDoc; + final LongValues minMatchValues; + + boolean matches; // if true then the doc matches, otherwise we don't know and need to check + int doc; // current doc ID + DisiWrapper topList; // list of matches + int freq; // number of scorers on the desired doc ID + long minMatch; // current required number of matches + + // priority queue that stores all scorers + final DisiPriorityQueue subScorers; + + final long cost; + + CoveringScorer(Weight weight, Collection scorers, LongValues minMatchValues, int maxDoc) { + super(weight); + + this.numScorers = scorers.size(); + this.maxDoc = maxDoc; + this.minMatchValues = minMatchValues; + this.doc = -1; + + subScorers = new DisiPriorityQueue(scorers.size()); + + for (Scorer scorer : scorers) { + subScorers.add(new DisiWrapper(scorer)); + } + + this.cost = scorers.stream().map(Scorer::iterator).mapToLong(DocIdSetIterator::cost).sum(); + } + + @Override + public final Collection getChildren() throws IOException { + List matchingChildren = new ArrayList<>(); + setTopListAndFreqIfNecessary(); + for (DisiWrapper s = topList; s != null; s = s.next) { + matchingChildren.add(new ChildScorer(s.scorer, "SHOULD")); + } + return matchingChildren; + } + + private final DocIdSetIterator approximation = new DocIdSetIterator() { + + @Override + public int docID() { + return doc; + } + + @Override + public int nextDoc() throws IOException { + return advance(docID() + 1); + } + + @Override + public int advance(int target) throws IOException { + // reset state + matches = false; + topList = null; + + doc = target; + setMinMatch(); + + DisiWrapper top = subScorers.top(); + int numMatches = 0; + int maxPotentialMatches = numScorers; + while (top.doc < target) { + if (maxPotentialMatches < minMatch) { + // No need to keep trying to advance to `target` since no match is possible. + if (target >= maxDoc - 1) { + doc = NO_MORE_DOCS; + } else { + doc = target + 1; + } + setMinMatch(); + return doc; + } + top.doc = top.iterator.advance(target); + boolean match = top.doc == target; + top = subScorers.updateTop(); + if (match) { + numMatches++; + if (numMatches >= minMatch) { + // success, no need to check other iterators + matches = true; + return doc; + } + } else { + maxPotentialMatches--; + } + } + + doc = top.doc; + setMinMatch(); + return doc; + } + + private void setMinMatch() throws IOException { + if (doc >= maxDoc) { + // advanceExact may not be called on out-of-range doc ids + minMatch = 1; + } else if (minMatchValues.advanceExact(doc)) { + // values < 1 are treated as 1: we require at least one match + minMatch = Math.max(1, minMatchValues.longValue()); + } else { + // this will make sure the document does not match + minMatch = Long.MAX_VALUE; + } + } + + @Override + public long cost() { + return maxDoc; + } + + }; + + private final TwoPhaseIterator twoPhase = new TwoPhaseIterator(approximation) { + + @Override + public boolean matches() throws IOException { + if (matches) { + return true; + } + if (topList == null) { + advanceAll(doc); + } + if (subScorers.top().doc != doc) { + assert subScorers.top().doc > doc; + return false; + } + setTopListAndFreq(); + assert topList.doc == doc; + return matches = freq >= minMatch; + } + + @Override + public float matchCost() { + return numScorers; + } + + }; + + @Override + public DocIdSetIterator iterator() { + return TwoPhaseIterator.asDocIdSetIterator(twoPhase); + } + + @Override + public TwoPhaseIterator twoPhaseIterator() { + return twoPhase; + } + + private void advanceAll(int target) throws IOException { + DisiWrapper top = subScorers.top(); + while (top.doc < target) { + top.doc = top.iterator.advance(target); + top = subScorers.updateTop(); + } + } + + private void setTopListAndFreq() { + topList = subScorers.topList(); + freq = 0; + for (DisiWrapper w = topList; w != null; w = w.next) { + freq++; + } + } + + private void setTopListAndFreqIfNecessary() throws IOException { + if (topList == null) { + advanceAll(doc); + setTopListAndFreq(); + } + } + + @Override + public int freq() throws IOException { + setTopListAndFreqIfNecessary(); + return freq; + } + + @Override + public float score() throws IOException { + // we need to know about all matches + setTopListAndFreqIfNecessary(); + double score = 0; + for (DisiWrapper w = topList; w != null; w = w.next) { + score += w.scorer.score(); + } + return (float) score; + } + + @Override + public int docID() { + return doc; + } + +} diff --git a/lucene/sandbox/src/test/org/apache/lucene/search/TestCoveringQuery.java b/lucene/sandbox/src/test/org/apache/lucene/search/TestCoveringQuery.java new file mode 100644 index 00000000000..29422896bfa --- /dev/null +++ b/lucene/sandbox/src/test/org/apache/lucene/search/TestCoveringQuery.java @@ -0,0 +1,157 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.lucene.search; + +import java.io.IOException; +import java.util.ArrayList; +import java.util.Arrays; +import java.util.Collections; +import java.util.List; + +import org.apache.lucene.document.Document; +import org.apache.lucene.document.Field.Store; +import org.apache.lucene.document.NumericDocValuesField; +import org.apache.lucene.document.StringField; +import org.apache.lucene.index.DirectoryReader; +import org.apache.lucene.index.IndexReader; +import org.apache.lucene.index.IndexWriter; +import org.apache.lucene.index.MultiReader; +import org.apache.lucene.index.Term; +import org.apache.lucene.search.BooleanClause.Occur; +import org.apache.lucene.store.Directory; +import org.apache.lucene.util.LuceneTestCase; + +public class TestCoveringQuery extends LuceneTestCase { + + public void testEquals() { + TermQuery tq1 = new TermQuery(new Term("foo", "bar")); + TermQuery tq2 = new TermQuery(new Term("foo", "quux")); + LongValuesSource vs = LongValuesSource.fromLongField("field"); + + CoveringQuery q1 = new CoveringQuery(Arrays.asList(tq1, tq2), vs); + CoveringQuery q2 = new CoveringQuery(Arrays.asList(tq1, tq2), vs); + QueryUtils.checkEqual(q1, q2); + + // order does not matter + CoveringQuery q3 = new CoveringQuery(Arrays.asList(tq2, tq1), vs); + QueryUtils.checkEqual(q1, q3); + + // values source matters + CoveringQuery q4 = new CoveringQuery(Arrays.asList(tq2, tq1), LongValuesSource.fromLongField("other_field")); + QueryUtils.checkUnequal(q1, q4); + + // duplicates matter + CoveringQuery q5 = new CoveringQuery(Arrays.asList(tq1, tq1, tq2), vs); + CoveringQuery q6 = new CoveringQuery(Arrays.asList(tq1, tq2, tq2), vs); + QueryUtils.checkUnequal(q5, q6); + + // query matters + CoveringQuery q7 = new CoveringQuery(Arrays.asList(tq1), vs); + CoveringQuery q8 = new CoveringQuery(Arrays.asList(tq2), vs); + QueryUtils.checkUnequal(q7, q8); + } + + public void testRewrite() throws IOException { + PhraseQuery pq = new PhraseQuery("foo", "bar"); + TermQuery tq = new TermQuery(new Term("foo", "bar")); + LongValuesSource vs = LongValuesSource.fromIntField("field"); + assertEquals( + new CoveringQuery(Collections.singleton(tq), vs), + new CoveringQuery(Collections.singleton(pq), vs).rewrite(new MultiReader())); + } + + public void testToString() { + TermQuery tq1 = new TermQuery(new Term("foo", "bar")); + TermQuery tq2 = new TermQuery(new Term("foo", "quux")); + LongValuesSource vs = LongValuesSource.fromIntField("field"); + CoveringQuery q = new CoveringQuery(Arrays.asList(tq1, tq2), vs); + assertEquals("CoveringQuery(queries=[foo:bar, foo:quux], minimumNumberMatch=long(field))", q.toString()); + assertEquals("CoveringQuery(queries=[bar, quux], minimumNumberMatch=long(field))", q.toString("foo")); + } + + public void testRandom() throws IOException { + Directory dir = newDirectory(); + IndexWriter w = new IndexWriter(dir, newIndexWriterConfig()); + int numDocs = atLeast(200); + for (int i = 0; i < numDocs; ++i) { + Document doc = new Document(); + if (random().nextBoolean()) { + doc.add(new StringField("field", "A", Store.NO)); + } + if (random().nextBoolean()) { + doc.add(new StringField("field", "B", Store.NO)); + } + if (random().nextDouble() > 0.9) { + doc.add(new StringField("field", "C", Store.NO)); + } + if (random().nextDouble() > 0.1) { + doc.add(new StringField("field", "D", Store.NO)); + } + doc.add(new NumericDocValuesField("min_match", random().nextInt(6))); + w.addDocument(doc); + } + + IndexReader r = DirectoryReader.open(w); + IndexSearcher searcher = new IndexSearcher(r); + w.close(); + + int iters = atLeast(10); + for (int iter = 0; iter < iters; ++iter) { + List queries = new ArrayList<>(); + if (random().nextBoolean()) { + queries.add(new TermQuery(new Term("field", "A"))); + } + if (random().nextBoolean()) { + queries.add(new TermQuery(new Term("field", "B"))); + } + if (random().nextBoolean()) { + queries.add(new TermQuery(new Term("field", "C"))); + } + if (random().nextBoolean()) { + queries.add(new TermQuery(new Term("field", "D"))); + } + if (random().nextBoolean()) { + queries.add(new TermQuery(new Term("field", "E"))); + } + + Query q = new CoveringQuery(queries, LongValuesSource.fromLongField("min_match")); + QueryUtils.check(random(), q, searcher); + + for (int i = 1; i < 4; ++i) { + BooleanQuery.Builder builder = new BooleanQuery.Builder() + .setMinimumNumberShouldMatch(i); + for (Query query : queries) { + builder.add(query, Occur.SHOULD); + } + Query q1 = builder.build(); + Query q2 = new CoveringQuery(queries, LongValuesSource.constant(i)); + assertEquals( + searcher.count(q1), + searcher.count(q2)); + } + + Query filtered = new BooleanQuery.Builder() + .add(q, Occur.MUST) + .add(new TermQuery(new Term("field", "A")), Occur.MUST) + .build(); + QueryUtils.check(random(), filtered, searcher); + } + + r.close(); + dir.close(); + } +} diff --git a/solr/CHANGES.txt b/solr/CHANGES.txt index d2291170c67..a042d1cae94 100644 --- a/solr/CHANGES.txt +++ b/solr/CHANGES.txt @@ -296,6 +296,10 @@ Upgrading from Solr 6.x * V2 APIs are now available at /api, in addition to /v2 (which is now deprecated). Legacy APIs continue to remain available at /solr. +* Solr was tested and is compatible with the final release candidate of Java 9. All startup scripts + detect Java 9 correctly and setup Garbage Collector logging. If the configuration file contains + logging options that are no longer supported with Java 9, startup will fail. + New Features ---------------------- * SOLR-9857, SOLR-9858: Collect aggregated metrics from nodes and shard leaders in overseer. (ab) @@ -473,6 +477,8 @@ Bug Fixes * SOLR-11268: AtomicUpdateProcessor complains missing UpdateLog (noble, Ishan Chattopadhyaya) +* SOLR-8689: Fix bin/solr.cmd so it can run properly on Java 9 (Uwe Schindler, hossman) + Optimizations ---------------------- diff --git a/solr/bin/solr.cmd b/solr/bin/solr.cmd index ce2350b2938..8baff8008f6 100644 --- a/solr/bin/solr.cmd +++ b/solr/bin/solr.cmd @@ -1113,14 +1113,28 @@ IF "%GC_TUNE%"=="" ( -XX:-OmitStackTraceInFastThrow ) -IF "%GC_LOG_OPTS%"=="" ( - set GC_LOG_OPTS=-verbose:gc ^ - -XX:+PrintHeapAtGC ^ - -XX:+PrintGCDetails ^ - -XX:+PrintGCDateStamps ^ - -XX:+PrintGCTimeStamps ^ - -XX:+PrintTenuringDistribution ^ - -XX:+PrintGCApplicationStoppedTime +if !JAVA_MAJOR_VERSION! GEQ 9 ( + IF NOT "%GC_LOG_OPTS%"=="" ( + echo ERROR: On Java 9 you cannot set GC_LOG_OPTS, only default GC logging is available. Exiting + GOTO :eof + ) + set GC_LOG_OPTS="-Xlog:gc*:file=\"!SOLR_LOGS_DIR!\solr_gc.log\":time,uptime:filecount=9,filesize=20000" +) else ( + IF "%GC_LOG_OPTS%"=="" ( + rem Set defaults for Java 8 + set GC_LOG_OPTS=-verbose:gc ^ + -XX:+PrintHeapAtGC ^ + -XX:+PrintGCDetails ^ + -XX:+PrintGCDateStamps ^ + -XX:+PrintGCTimeStamps ^ + -XX:+PrintTenuringDistribution ^ + -XX:+PrintGCApplicationStoppedTime + ) + if "%JAVA_VENDOR%" == "IBM J9" ( + set GC_LOG_OPTS=!GC_LOG_OPTS! "-Xverbosegclog:!SOLR_LOGS_DIR!\solr_gc.log" -XX:+UseGCLogFileRotation -XX:NumberOfGCLogFiles=9 -XX:GCLogFileSize=20M + ) else ( + set GC_LOG_OPTS=!GC_LOG_OPTS! "-Xloggc:!SOLR_LOGS_DIR!\solr_gc.log" -XX:+UseGCLogFileRotation -XX:NumberOfGCLogFiles=9 -XX:GCLogFileSize=20M + ) ) IF "%verbose%"=="1" ( @@ -1207,26 +1221,20 @@ IF NOT EXIST "%SOLR_SERVER_DIR%\tmp" ( mkdir "%SOLR_SERVER_DIR%\tmp" ) -IF "%JAVA_VENDOR%" == "IBM J9" ( - set GCLOG_OPT="-Xverbosegclog:!SOLR_LOGS_DIR!\solr_gc.log" -XX:+UseGCLogFileRotation -XX:NumberOfGCLogFiles=9 -XX:GCLogFileSize=20M -) else ( - set GCLOG_OPT="-Xloggc:!SOLR_LOGS_DIR!\solr_gc.log" -XX:+UseGCLogFileRotation -XX:NumberOfGCLogFiles=9 -XX:GCLogFileSize=20M -) - IF "%DEFAULT_CONFDIR%"=="" set "DEFAULT_CONFDIR=%SOLR_SERVER_DIR%\solr\configsets\_default\conf" IF "%FG%"=="1" ( REM run solr in the foreground title "Solr-%SOLR_PORT%" echo %SOLR_PORT%>"%SOLR_TIP%"\bin\solr-%SOLR_PORT%.port - "%JAVA%" %SERVEROPT% %SOLR_JAVA_MEM% %START_OPTS% %GCLOG_OPT% ^ + "%JAVA%" %SERVEROPT% %SOLR_JAVA_MEM% %START_OPTS% ^ -Dlog4j.configuration="%LOG4J_CONFIG%" -DSTOP.PORT=!STOP_PORT! -DSTOP.KEY=%STOP_KEY% ^ -Dsolr.solr.home="%SOLR_HOME%" -Dsolr.install.dir="%SOLR_TIP%" -Dsolr.default.confdir="%DEFAULT_CONFDIR%" ^ -Djetty.host=%SOLR_JETTY_HOST% -Djetty.port=%SOLR_PORT% -Djetty.home="%SOLR_SERVER_DIR%" ^ -Djava.io.tmpdir="%SOLR_SERVER_DIR%\tmp" -jar start.jar "%SOLR_JETTY_CONFIG%" "%SOLR_JETTY_ADDL_CONFIG%" ) ELSE ( START /B "Solr-%SOLR_PORT%" /D "%SOLR_SERVER_DIR%" ^ - "%JAVA%" %SERVEROPT% %SOLR_JAVA_MEM% %START_OPTS% %GCLOG_OPT% ^ + "%JAVA%" %SERVEROPT% %SOLR_JAVA_MEM% %START_OPTS% ^ -Dlog4j.configuration="%LOG4J_CONFIG%" -DSTOP.PORT=!STOP_PORT! -DSTOP.KEY=%STOP_KEY% ^ -Dsolr.log.muteconsole ^ -Dsolr.solr.home="%SOLR_HOME%" -Dsolr.install.dir="%SOLR_TIP%" -Dsolr.default.confdir="%DEFAULT_CONFDIR%" ^ @@ -1843,8 +1851,12 @@ FOR /f "usebackq tokens=3" %%a IN (`^""%JAVA%" -version 2^>^&1 ^| findstr "versi set JAVA_VERSION_INFO=!JAVA_VERSION_INFO:"=! REM Extract the major Java version, e.g. 7, 8, 9, 10 ... - for /f "tokens=2 delims=." %%a in ("!JAVA_VERSION_INFO!") do ( - set JAVA_MAJOR_VERSION=%%a + for /f "tokens=1,2 delims=." %%a in ("!JAVA_VERSION_INFO!") do ( + if "%%a" GEQ "9" ( + set JAVA_MAJOR_VERSION=%%a + ) else ( + set JAVA_MAJOR_VERSION=%%b + ) ) REM Don't look for "_{build}" if we're on IBM J9. diff --git a/solr/bin/solr.in.cmd b/solr/bin/solr.in.cmd index 60712639f99..afba4f75ab5 100644 --- a/solr/bin/solr.in.cmd +++ b/solr/bin/solr.in.cmd @@ -27,7 +27,9 @@ REM set SOLR_JAVA_HOME= REM Increase Java Min/Max Heap as needed to support your indexing / query needs REM set SOLR_JAVA_MEM=-Xms512m -Xmx512m -REM Enable verbose GC logging +REM Configure verbose GC logging: +REM For Java 8: if this is set, additional params will be added to specify the log file & rotation +REM For Java 9 or higher: GC_LOG_OPTS is currently not supported. If you set it, the startup script will exit with failure. REM set GC_LOG_OPTS=-verbose:gc -XX:+PrintHeapAtGC -XX:+PrintGCDetails -XX:+PrintGCDateStamps -XX:+PrintGCTimeStamps -XX:+PrintTenuringDistribution -XX:+PrintGCApplicationStoppedTime REM Various GC settings have shown to work well for a number of common Solr workloads. diff --git a/solr/bin/solr.in.sh b/solr/bin/solr.in.sh index 060ece9149e..514efb1ad9d 100644 --- a/solr/bin/solr.in.sh +++ b/solr/bin/solr.in.sh @@ -36,12 +36,12 @@ # Enable verbose GC logging... # * If this is unset, various default options will be selected depending on which JVM version is in use -# * For java8 or lower: if this is set, additional params will be added to specify the log file & rotation -# * For java9 or higher: each included opt param that starts with '-Xlog:gc', but does not include an output -# specifier, will have a 'file' output specifier (as well as formatting & rollover options) appended, -# using the effective value of the SOLR_LOGS_DIR. +# * For Java 8: if this is set, additional params will be added to specify the log file & rotation +# * For Java 9 or higher: each included opt param that starts with '-Xlog:gc', but does not include an +# output specifier, will have a 'file' output specifier (as well as formatting & rollover options) +# appended, using the effective value of the SOLR_LOGS_DIR. # -#GC_LOG_OPTS='-Xlog:gc*' # (java9) +#GC_LOG_OPTS='-Xlog:gc*' # (Java 9+) #GC_LOG_OPTS="-verbose:gc -XX:+PrintHeapAtGC -XX:+PrintGCDetails \ # -XX:+PrintGCDateStamps -XX:+PrintGCTimeStamps -XX:+PrintTenuringDistribution -XX:+PrintGCApplicationStoppedTime"