This commit is contained in:
Cao Manh Dat 2017-08-22 10:08:18 +07:00
commit ebc3916b04
12 changed files with 745 additions and 31 deletions

View File

@ -15,6 +15,11 @@ Changes in Runtime Behavior
======================= Lucene 7.1.0 ======================= ======================= Lucene 7.1.0 =======================
New Features
* LUCENE-7621: Add CoveringQuery, a query whose required number of matching
clauses can be defined per document. (Adrien Grand)
Optimizations Optimizations
* LUCENE-7905: Optimize how OrdinalMap (used by * LUCENE-7905: Optimize how OrdinalMap (used by
@ -28,6 +33,9 @@ Optimizations
than 8x greater than the cost of the lead iterator in order to use doc values. than 8x greater than the cost of the lead iterator in order to use doc values.
(Murali Krishna P via Adrien Grand) (Murali Krishna P via Adrien Grand)
* LUCENE-7925: Collapse duplicate SHOULD or MUST clauses by summing up their
boosts. (Adrien Grand)
Bug Fixes Bug Fixes
* LUCENE-7916: Prevent ArrayIndexOutOfBoundsException if ICUTokenizer is used * LUCENE-7916: Prevent ArrayIndexOutOfBoundsException if ICUTokenizer is used
@ -35,6 +43,12 @@ Bug Fixes
not recommended, lucene-analyzers-icu contains binary data structures not recommended, lucene-analyzers-icu contains binary data structures
specific to ICU/Unicode versions it is built against. (Chris Koenig, Robert Muir) specific to ICU/Unicode versions it is built against. (Chris Koenig, Robert Muir)
Build
* SOLR-11181: Switch order of maven artifact publishing procedure: deploy first
instead of locally installing first, to workaround a double repository push of
*-sources.jar and *-javadoc.jar files. (Lynn Monson via Steve Rowe)
======================= Lucene 7.0.0 ======================= ======================= Lucene 7.0.0 =======================
New Features New Features

View File

@ -559,10 +559,6 @@
<artifact:install-provider artifactId="wagon-ssh" version="1.0-beta-7"/> <artifact:install-provider artifactId="wagon-ssh" version="1.0-beta-7"/>
<parent-poms/> <parent-poms/>
<artifact:pom id="maven.project" file="@{pom.xml}"/> <artifact:pom id="maven.project" file="@{pom.xml}"/>
<artifact:install file="@{jar.file}">
<artifact-attachments/>
<pom refid="maven.project"/>
</artifact:install>
<artifact:deploy file="@{jar.file}"> <artifact:deploy file="@{jar.file}">
<artifact-attachments/> <artifact-attachments/>
<remoteRepository id="${m2.repository.id}" url="${m2.repository.url}"> <remoteRepository id="${m2.repository.id}" url="${m2.repository.url}">
@ -570,6 +566,10 @@
</remoteRepository> </remoteRepository>
<pom refid="maven.project"/> <pom refid="maven.project"/>
</artifact:deploy> </artifact:deploy>
<artifact:install file="@{jar.file}">
<artifact-attachments/>
<pom refid="maven.project"/>
</artifact:install>
</sequential> </sequential>
</macrodef> </macrodef>

View File

@ -23,6 +23,7 @@ import java.util.Arrays;
import java.util.Collection; import java.util.Collection;
import java.util.Collections; import java.util.Collections;
import java.util.EnumMap; import java.util.EnumMap;
import java.util.HashMap;
import java.util.HashSet; import java.util.HashSet;
import java.util.Iterator; import java.util.Iterator;
import java.util.List; import java.util.List;
@ -332,6 +333,69 @@ public class BooleanQuery extends Query implements Iterable<BooleanClause> {
} }
} }
// Deduplicate SHOULD clauses by summing up their boosts
if (clauseSets.get(Occur.SHOULD).size() > 0 && minimumNumberShouldMatch <= 1) {
Map<Query, Double> shouldClauses = new HashMap<>();
for (Query query : clauseSets.get(Occur.SHOULD)) {
double boost = 1;
while (query instanceof BoostQuery) {
BoostQuery bq = (BoostQuery) query;
boost *= bq.getBoost();
query = bq.getQuery();
}
shouldClauses.put(query, shouldClauses.getOrDefault(query, 0d) + boost);
}
if (shouldClauses.size() != clauseSets.get(Occur.SHOULD).size()) {
BooleanQuery.Builder builder = new BooleanQuery.Builder()
.setMinimumNumberShouldMatch(minimumNumberShouldMatch);
for (Map.Entry<Query,Double> entry : shouldClauses.entrySet()) {
Query query = entry.getKey();
float boost = entry.getValue().floatValue();
if (boost != 1f) {
query = new BoostQuery(query, boost);
}
builder.add(query, Occur.SHOULD);
}
for (BooleanClause clause : clauses) {
if (clause.getOccur() != Occur.SHOULD) {
builder.add(clause);
}
}
return builder.build();
}
}
// Deduplicate MUST clauses by summing up their boosts
if (clauseSets.get(Occur.MUST).size() > 0) {
Map<Query, Double> mustClauses = new HashMap<>();
for (Query query : clauseSets.get(Occur.MUST)) {
double boost = 1;
while (query instanceof BoostQuery) {
BoostQuery bq = (BoostQuery) query;
boost *= bq.getBoost();
query = bq.getQuery();
}
mustClauses.put(query, mustClauses.getOrDefault(query, 0d) + boost);
}
if (mustClauses.size() != clauseSets.get(Occur.MUST).size()) {
BooleanQuery.Builder builder = new BooleanQuery.Builder()
.setMinimumNumberShouldMatch(minimumNumberShouldMatch);
for (Map.Entry<Query,Double> entry : mustClauses.entrySet()) {
Query query = entry.getKey();
float boost = entry.getValue().floatValue();
if (boost != 1f) {
query = new BoostQuery(query, boost);
}
builder.add(query, Occur.MUST);
}
for (BooleanClause clause : clauses) {
if (clause.getOccur() != Occur.MUST) {
builder.add(clause);
}
}
return builder.build();
}
}
// Rewrite queries whose single scoring clause is a MUST clause on a // Rewrite queries whose single scoring clause is a MUST clause on a
// MatchAllDocsQuery to a ConstantScoreQuery // MatchAllDocsQuery to a ConstantScoreQuery

View File

@ -427,4 +427,57 @@ public class TestBooleanRewrites extends LuceneTestCase {
assertEquals(expectedScore, actualScore, expectedScore / 100); // error under 1% assertEquals(expectedScore, actualScore, expectedScore / 100); // error under 1%
} }
} }
public void testDeduplicateShouldClauses() throws IOException {
IndexSearcher searcher = newSearcher(new MultiReader());
Query query = new BooleanQuery.Builder()
.add(new TermQuery(new Term("foo", "bar")), Occur.SHOULD)
.add(new TermQuery(new Term("foo", "bar")), Occur.SHOULD)
.build();
Query expected = new BoostQuery(new TermQuery(new Term("foo", "bar")), 2);
assertEquals(expected, searcher.rewrite(query));
query = new BooleanQuery.Builder()
.add(new TermQuery(new Term("foo", "bar")), Occur.SHOULD)
.add(new BoostQuery(new TermQuery(new Term("foo", "bar")), 2), Occur.SHOULD)
.add(new TermQuery(new Term("foo", "quux")), Occur.SHOULD)
.build();
expected = new BooleanQuery.Builder()
.add(new BoostQuery(new TermQuery(new Term("foo", "bar")), 3), Occur.SHOULD)
.add(new TermQuery(new Term("foo", "quux")), Occur.SHOULD)
.build();
assertEquals(expected, searcher.rewrite(query));
query = new BooleanQuery.Builder()
.setMinimumNumberShouldMatch(2)
.add(new TermQuery(new Term("foo", "bar")), Occur.SHOULD)
.add(new TermQuery(new Term("foo", "bar")), Occur.SHOULD)
.add(new TermQuery(new Term("foo", "quux")), Occur.SHOULD)
.build();
expected = query;
assertEquals(expected, searcher.rewrite(query));
}
public void testDeduplicateMustClauses() throws IOException {
IndexSearcher searcher = newSearcher(new MultiReader());
Query query = new BooleanQuery.Builder()
.add(new TermQuery(new Term("foo", "bar")), Occur.MUST)
.add(new TermQuery(new Term("foo", "bar")), Occur.MUST)
.build();
Query expected = new BoostQuery(new TermQuery(new Term("foo", "bar")), 2);
assertEquals(expected, searcher.rewrite(query));
query = new BooleanQuery.Builder()
.add(new TermQuery(new Term("foo", "bar")), Occur.MUST)
.add(new BoostQuery(new TermQuery(new Term("foo", "bar")), 2), Occur.MUST)
.add(new TermQuery(new Term("foo", "quux")), Occur.MUST)
.build();
expected = new BooleanQuery.Builder()
.add(new BoostQuery(new TermQuery(new Term("foo", "bar")), 3), Occur.MUST)
.add(new TermQuery(new Term("foo", "quux")), Occur.MUST)
.build();
assertEquals(expected, searcher.rewrite(query));
}
} }

View File

@ -90,7 +90,8 @@ public class TestConstantScoreQuery extends LuceneTestCase {
RandomIndexWriter writer = new RandomIndexWriter (random(), directory); RandomIndexWriter writer = new RandomIndexWriter (random(), directory);
Document doc = new Document(); Document doc = new Document();
doc.add(newStringField("field", "term", Field.Store.NO)); doc.add(newStringField("field", "term1", Field.Store.NO));
doc.add(newStringField("field", "term2", Field.Store.NO));
writer.addDocument(doc); writer.addDocument(doc);
reader = writer.getReader(); reader = writer.getReader();
@ -99,8 +100,8 @@ public class TestConstantScoreQuery extends LuceneTestCase {
searcher = newSearcher(reader, true, false); searcher = newSearcher(reader, true, false);
searcher.setQueryCache(null); // to assert on scorer impl searcher.setQueryCache(null); // to assert on scorer impl
final BoostQuery csq1 = new BoostQuery(new ConstantScoreQuery(new TermQuery(new Term ("field", "term"))), 2f); final BoostQuery csq1 = new BoostQuery(new ConstantScoreQuery(new TermQuery(new Term ("field", "term1"))), 2f);
final BoostQuery csq2 = new BoostQuery(new ConstantScoreQuery(csq1), 5f); final BoostQuery csq2 = new BoostQuery(new ConstantScoreQuery(new ConstantScoreQuery(new TermQuery(new Term ("field", "term2")))), 5f);
final BooleanQuery.Builder bq = new BooleanQuery.Builder(); final BooleanQuery.Builder bq = new BooleanQuery.Builder();
bq.add(csq1, BooleanClause.Occur.SHOULD); bq.add(csq1, BooleanClause.Occur.SHOULD);

View File

@ -0,0 +1,180 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.lucene.search;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Collection;
import java.util.List;
import java.util.Objects;
import java.util.Set;
import java.util.stream.Collectors;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.LeafReaderContext;
import org.apache.lucene.index.Term;
/** A {@link Query} that allows to have a configurable number or required
* matches per document. This is typically useful in order to build queries
* whose query terms must all appear in documents.
* @lucene.experimental
*/
public final class CoveringQuery extends Query {
private final Collection<Query> queries;
private final LongValuesSource minimumNumberMatch;
private final int hashCode;
/**
* Sole constructor.
* @param queries Sub queries to match.
* @param minimumNumberMatch Per-document long value that records how many queries
* should match. Values that are less than 1 are treated
* like <tt>1</tt>: only documents that have at least one
* matching clause will be considered matches. Documents
* that do not have a value for <tt>minimumNumberMatch</tt>
* do not match.
*/
public CoveringQuery(Collection<Query> queries, LongValuesSource minimumNumberMatch) {
if (queries.size() > BooleanQuery.getMaxClauseCount()) {
throw new BooleanQuery.TooManyClauses();
}
if (minimumNumberMatch.needsScores()) {
throw new IllegalArgumentException("The minimum number of matches may not depend on the score.");
}
this.queries = new Multiset<>();
this.queries.addAll(queries);
this.minimumNumberMatch = Objects.requireNonNull(minimumNumberMatch);
this.hashCode = computeHashCode();
}
@Override
public String toString(String field) {
String queriesToString = queries.stream()
.map(q -> q.toString(field))
.sorted()
.collect(Collectors.joining(", "));
return "CoveringQuery(queries=[" + queriesToString + "], minimumNumberMatch=" + minimumNumberMatch + ")";
}
@Override
public boolean equals(Object obj) {
if (sameClassAs(obj) == false) {
return false;
}
CoveringQuery that = (CoveringQuery) obj;
return hashCode == that.hashCode // not necessary but makes equals faster
&& Objects.equals(queries, that.queries)
&& Objects.equals(minimumNumberMatch, that.minimumNumberMatch);
}
private int computeHashCode() {
int h = classHash();
h = 31 * h + queries.hashCode();
h = 31 * h + minimumNumberMatch.hashCode();
return h;
}
@Override
public int hashCode() {
return hashCode;
}
@Override
public Query rewrite(IndexReader reader) throws IOException {
Multiset<Query> rewritten = new Multiset<>();
boolean actuallyRewritten = false;
for (Query query : queries) {
Query r = query.rewrite(reader);
rewritten.add(r);
actuallyRewritten |= query != r;
}
if (actuallyRewritten) {
return new CoveringQuery(rewritten, minimumNumberMatch);
}
return super.rewrite(reader);
}
@Override
public Weight createWeight(IndexSearcher searcher, boolean needsScores, float boost) throws IOException {
final List<Weight> weights = new ArrayList<>(queries.size());
for (Query query : queries) {
weights.add(searcher.createWeight(query, needsScores, boost));
}
return new CoveringWeight(this, weights, minimumNumberMatch);
}
private static class CoveringWeight extends Weight {
private final Collection<Weight> weights;
private final LongValuesSource minimumNumberMatch;
CoveringWeight(Query query, Collection<Weight> weights, LongValuesSource minimumNumberMatch) {
super(query);
this.weights = weights;
this.minimumNumberMatch = minimumNumberMatch;
}
@Override
public void extractTerms(Set<Term> terms) {
for (Weight weight : weights) {
weight.extractTerms(terms);
}
}
@Override
public Explanation explain(LeafReaderContext context, int doc) throws IOException {
LongValues minMatchValues = minimumNumberMatch.getValues(context, null);
if (minMatchValues.advanceExact(doc) == false) {
return Explanation.noMatch("minimumNumberMatch has no value on the current document");
}
final long minimumNumberMatch = Math.max(1, minMatchValues.longValue());
int freq = 0;
double score = 0;
List<Explanation> subExpls = new ArrayList<>();
for (Weight weight : weights) {
Explanation subExpl = weight.explain(context, doc);
if (subExpl.isMatch()) {
freq++;
score += subExpl.getValue();
}
subExpls.add(subExpl);
}
if (freq >= minimumNumberMatch) {
return Explanation.match((float) score, freq + " matches for " + minimumNumberMatch + " required matches, sum of:", subExpls);
} else {
return Explanation.noMatch(freq + " matches for " + minimumNumberMatch + " required matches", subExpls);
}
}
@Override
public Scorer scorer(LeafReaderContext context) throws IOException {
Collection<Scorer> scorers = new ArrayList<>();
for (Weight w : weights) {
Scorer s = w.scorer(context);
if (s != null) {
scorers.add(s);
}
}
if (scorers.isEmpty()) {
return null;
}
return new CoveringScorer(this, scorers, minimumNumberMatch.getValues(context, null), context.reader().maxDoc());
}
}
}

View File

@ -0,0 +1,225 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.lucene.search;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Collection;
import java.util.List;
/** A {@link Scorer} whose number of matches is per-document. */
final class CoveringScorer extends Scorer {
final int numScorers;
final int maxDoc;
final LongValues minMatchValues;
boolean matches; // if true then the doc matches, otherwise we don't know and need to check
int doc; // current doc ID
DisiWrapper topList; // list of matches
int freq; // number of scorers on the desired doc ID
long minMatch; // current required number of matches
// priority queue that stores all scorers
final DisiPriorityQueue subScorers;
final long cost;
CoveringScorer(Weight weight, Collection<Scorer> scorers, LongValues minMatchValues, int maxDoc) {
super(weight);
this.numScorers = scorers.size();
this.maxDoc = maxDoc;
this.minMatchValues = minMatchValues;
this.doc = -1;
subScorers = new DisiPriorityQueue(scorers.size());
for (Scorer scorer : scorers) {
subScorers.add(new DisiWrapper(scorer));
}
this.cost = scorers.stream().map(Scorer::iterator).mapToLong(DocIdSetIterator::cost).sum();
}
@Override
public final Collection<ChildScorer> getChildren() throws IOException {
List<ChildScorer> matchingChildren = new ArrayList<>();
setTopListAndFreqIfNecessary();
for (DisiWrapper s = topList; s != null; s = s.next) {
matchingChildren.add(new ChildScorer(s.scorer, "SHOULD"));
}
return matchingChildren;
}
private final DocIdSetIterator approximation = new DocIdSetIterator() {
@Override
public int docID() {
return doc;
}
@Override
public int nextDoc() throws IOException {
return advance(docID() + 1);
}
@Override
public int advance(int target) throws IOException {
// reset state
matches = false;
topList = null;
doc = target;
setMinMatch();
DisiWrapper top = subScorers.top();
int numMatches = 0;
int maxPotentialMatches = numScorers;
while (top.doc < target) {
if (maxPotentialMatches < minMatch) {
// No need to keep trying to advance to `target` since no match is possible.
if (target >= maxDoc - 1) {
doc = NO_MORE_DOCS;
} else {
doc = target + 1;
}
setMinMatch();
return doc;
}
top.doc = top.iterator.advance(target);
boolean match = top.doc == target;
top = subScorers.updateTop();
if (match) {
numMatches++;
if (numMatches >= minMatch) {
// success, no need to check other iterators
matches = true;
return doc;
}
} else {
maxPotentialMatches--;
}
}
doc = top.doc;
setMinMatch();
return doc;
}
private void setMinMatch() throws IOException {
if (doc >= maxDoc) {
// advanceExact may not be called on out-of-range doc ids
minMatch = 1;
} else if (minMatchValues.advanceExact(doc)) {
// values < 1 are treated as 1: we require at least one match
minMatch = Math.max(1, minMatchValues.longValue());
} else {
// this will make sure the document does not match
minMatch = Long.MAX_VALUE;
}
}
@Override
public long cost() {
return maxDoc;
}
};
private final TwoPhaseIterator twoPhase = new TwoPhaseIterator(approximation) {
@Override
public boolean matches() throws IOException {
if (matches) {
return true;
}
if (topList == null) {
advanceAll(doc);
}
if (subScorers.top().doc != doc) {
assert subScorers.top().doc > doc;
return false;
}
setTopListAndFreq();
assert topList.doc == doc;
return matches = freq >= minMatch;
}
@Override
public float matchCost() {
return numScorers;
}
};
@Override
public DocIdSetIterator iterator() {
return TwoPhaseIterator.asDocIdSetIterator(twoPhase);
}
@Override
public TwoPhaseIterator twoPhaseIterator() {
return twoPhase;
}
private void advanceAll(int target) throws IOException {
DisiWrapper top = subScorers.top();
while (top.doc < target) {
top.doc = top.iterator.advance(target);
top = subScorers.updateTop();
}
}
private void setTopListAndFreq() {
topList = subScorers.topList();
freq = 0;
for (DisiWrapper w = topList; w != null; w = w.next) {
freq++;
}
}
private void setTopListAndFreqIfNecessary() throws IOException {
if (topList == null) {
advanceAll(doc);
setTopListAndFreq();
}
}
@Override
public int freq() throws IOException {
setTopListAndFreqIfNecessary();
return freq;
}
@Override
public float score() throws IOException {
// we need to know about all matches
setTopListAndFreqIfNecessary();
double score = 0;
for (DisiWrapper w = topList; w != null; w = w.next) {
score += w.scorer.score();
}
return (float) score;
}
@Override
public int docID() {
return doc;
}
}

View File

@ -0,0 +1,157 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.lucene.search;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collections;
import java.util.List;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field.Store;
import org.apache.lucene.document.NumericDocValuesField;
import org.apache.lucene.document.StringField;
import org.apache.lucene.index.DirectoryReader;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.MultiReader;
import org.apache.lucene.index.Term;
import org.apache.lucene.search.BooleanClause.Occur;
import org.apache.lucene.store.Directory;
import org.apache.lucene.util.LuceneTestCase;
public class TestCoveringQuery extends LuceneTestCase {
public void testEquals() {
TermQuery tq1 = new TermQuery(new Term("foo", "bar"));
TermQuery tq2 = new TermQuery(new Term("foo", "quux"));
LongValuesSource vs = LongValuesSource.fromLongField("field");
CoveringQuery q1 = new CoveringQuery(Arrays.asList(tq1, tq2), vs);
CoveringQuery q2 = new CoveringQuery(Arrays.asList(tq1, tq2), vs);
QueryUtils.checkEqual(q1, q2);
// order does not matter
CoveringQuery q3 = new CoveringQuery(Arrays.asList(tq2, tq1), vs);
QueryUtils.checkEqual(q1, q3);
// values source matters
CoveringQuery q4 = new CoveringQuery(Arrays.asList(tq2, tq1), LongValuesSource.fromLongField("other_field"));
QueryUtils.checkUnequal(q1, q4);
// duplicates matter
CoveringQuery q5 = new CoveringQuery(Arrays.asList(tq1, tq1, tq2), vs);
CoveringQuery q6 = new CoveringQuery(Arrays.asList(tq1, tq2, tq2), vs);
QueryUtils.checkUnequal(q5, q6);
// query matters
CoveringQuery q7 = new CoveringQuery(Arrays.asList(tq1), vs);
CoveringQuery q8 = new CoveringQuery(Arrays.asList(tq2), vs);
QueryUtils.checkUnequal(q7, q8);
}
public void testRewrite() throws IOException {
PhraseQuery pq = new PhraseQuery("foo", "bar");
TermQuery tq = new TermQuery(new Term("foo", "bar"));
LongValuesSource vs = LongValuesSource.fromIntField("field");
assertEquals(
new CoveringQuery(Collections.singleton(tq), vs),
new CoveringQuery(Collections.singleton(pq), vs).rewrite(new MultiReader()));
}
public void testToString() {
TermQuery tq1 = new TermQuery(new Term("foo", "bar"));
TermQuery tq2 = new TermQuery(new Term("foo", "quux"));
LongValuesSource vs = LongValuesSource.fromIntField("field");
CoveringQuery q = new CoveringQuery(Arrays.asList(tq1, tq2), vs);
assertEquals("CoveringQuery(queries=[foo:bar, foo:quux], minimumNumberMatch=long(field))", q.toString());
assertEquals("CoveringQuery(queries=[bar, quux], minimumNumberMatch=long(field))", q.toString("foo"));
}
public void testRandom() throws IOException {
Directory dir = newDirectory();
IndexWriter w = new IndexWriter(dir, newIndexWriterConfig());
int numDocs = atLeast(200);
for (int i = 0; i < numDocs; ++i) {
Document doc = new Document();
if (random().nextBoolean()) {
doc.add(new StringField("field", "A", Store.NO));
}
if (random().nextBoolean()) {
doc.add(new StringField("field", "B", Store.NO));
}
if (random().nextDouble() > 0.9) {
doc.add(new StringField("field", "C", Store.NO));
}
if (random().nextDouble() > 0.1) {
doc.add(new StringField("field", "D", Store.NO));
}
doc.add(new NumericDocValuesField("min_match", random().nextInt(6)));
w.addDocument(doc);
}
IndexReader r = DirectoryReader.open(w);
IndexSearcher searcher = new IndexSearcher(r);
w.close();
int iters = atLeast(10);
for (int iter = 0; iter < iters; ++iter) {
List<Query> queries = new ArrayList<>();
if (random().nextBoolean()) {
queries.add(new TermQuery(new Term("field", "A")));
}
if (random().nextBoolean()) {
queries.add(new TermQuery(new Term("field", "B")));
}
if (random().nextBoolean()) {
queries.add(new TermQuery(new Term("field", "C")));
}
if (random().nextBoolean()) {
queries.add(new TermQuery(new Term("field", "D")));
}
if (random().nextBoolean()) {
queries.add(new TermQuery(new Term("field", "E")));
}
Query q = new CoveringQuery(queries, LongValuesSource.fromLongField("min_match"));
QueryUtils.check(random(), q, searcher);
for (int i = 1; i < 4; ++i) {
BooleanQuery.Builder builder = new BooleanQuery.Builder()
.setMinimumNumberShouldMatch(i);
for (Query query : queries) {
builder.add(query, Occur.SHOULD);
}
Query q1 = builder.build();
Query q2 = new CoveringQuery(queries, LongValuesSource.constant(i));
assertEquals(
searcher.count(q1),
searcher.count(q2));
}
Query filtered = new BooleanQuery.Builder()
.add(q, Occur.MUST)
.add(new TermQuery(new Term("field", "A")), Occur.MUST)
.build();
QueryUtils.check(random(), filtered, searcher);
}
r.close();
dir.close();
}
}

View File

@ -296,6 +296,10 @@ Upgrading from Solr 6.x
* V2 APIs are now available at /api, in addition to /v2 (which is now deprecated). Legacy APIs continue to remain * V2 APIs are now available at /api, in addition to /v2 (which is now deprecated). Legacy APIs continue to remain
available at /solr. available at /solr.
* Solr was tested and is compatible with the final release candidate of Java 9. All startup scripts
detect Java 9 correctly and setup Garbage Collector logging. If the configuration file contains
logging options that are no longer supported with Java 9, startup will fail.
New Features New Features
---------------------- ----------------------
* SOLR-9857, SOLR-9858: Collect aggregated metrics from nodes and shard leaders in overseer. (ab) * SOLR-9857, SOLR-9858: Collect aggregated metrics from nodes and shard leaders in overseer. (ab)
@ -473,6 +477,8 @@ Bug Fixes
* SOLR-11268: AtomicUpdateProcessor complains missing UpdateLog (noble, Ishan Chattopadhyaya) * SOLR-11268: AtomicUpdateProcessor complains missing UpdateLog (noble, Ishan Chattopadhyaya)
* SOLR-8689: Fix bin/solr.cmd so it can run properly on Java 9 (Uwe Schindler, hossman)
Optimizations Optimizations
---------------------- ----------------------

View File

@ -1113,14 +1113,28 @@ IF "%GC_TUNE%"=="" (
-XX:-OmitStackTraceInFastThrow -XX:-OmitStackTraceInFastThrow
) )
IF "%GC_LOG_OPTS%"=="" ( if !JAVA_MAJOR_VERSION! GEQ 9 (
set GC_LOG_OPTS=-verbose:gc ^ IF NOT "%GC_LOG_OPTS%"=="" (
-XX:+PrintHeapAtGC ^ echo ERROR: On Java 9 you cannot set GC_LOG_OPTS, only default GC logging is available. Exiting
-XX:+PrintGCDetails ^ GOTO :eof
-XX:+PrintGCDateStamps ^ )
-XX:+PrintGCTimeStamps ^ set GC_LOG_OPTS="-Xlog:gc*:file=\"!SOLR_LOGS_DIR!\solr_gc.log\":time,uptime:filecount=9,filesize=20000"
-XX:+PrintTenuringDistribution ^ ) else (
-XX:+PrintGCApplicationStoppedTime IF "%GC_LOG_OPTS%"=="" (
rem Set defaults for Java 8
set GC_LOG_OPTS=-verbose:gc ^
-XX:+PrintHeapAtGC ^
-XX:+PrintGCDetails ^
-XX:+PrintGCDateStamps ^
-XX:+PrintGCTimeStamps ^
-XX:+PrintTenuringDistribution ^
-XX:+PrintGCApplicationStoppedTime
)
if "%JAVA_VENDOR%" == "IBM J9" (
set GC_LOG_OPTS=!GC_LOG_OPTS! "-Xverbosegclog:!SOLR_LOGS_DIR!\solr_gc.log" -XX:+UseGCLogFileRotation -XX:NumberOfGCLogFiles=9 -XX:GCLogFileSize=20M
) else (
set GC_LOG_OPTS=!GC_LOG_OPTS! "-Xloggc:!SOLR_LOGS_DIR!\solr_gc.log" -XX:+UseGCLogFileRotation -XX:NumberOfGCLogFiles=9 -XX:GCLogFileSize=20M
)
) )
IF "%verbose%"=="1" ( IF "%verbose%"=="1" (
@ -1207,26 +1221,20 @@ IF NOT EXIST "%SOLR_SERVER_DIR%\tmp" (
mkdir "%SOLR_SERVER_DIR%\tmp" mkdir "%SOLR_SERVER_DIR%\tmp"
) )
IF "%JAVA_VENDOR%" == "IBM J9" (
set GCLOG_OPT="-Xverbosegclog:!SOLR_LOGS_DIR!\solr_gc.log" -XX:+UseGCLogFileRotation -XX:NumberOfGCLogFiles=9 -XX:GCLogFileSize=20M
) else (
set GCLOG_OPT="-Xloggc:!SOLR_LOGS_DIR!\solr_gc.log" -XX:+UseGCLogFileRotation -XX:NumberOfGCLogFiles=9 -XX:GCLogFileSize=20M
)
IF "%DEFAULT_CONFDIR%"=="" set "DEFAULT_CONFDIR=%SOLR_SERVER_DIR%\solr\configsets\_default\conf" IF "%DEFAULT_CONFDIR%"=="" set "DEFAULT_CONFDIR=%SOLR_SERVER_DIR%\solr\configsets\_default\conf"
IF "%FG%"=="1" ( IF "%FG%"=="1" (
REM run solr in the foreground REM run solr in the foreground
title "Solr-%SOLR_PORT%" title "Solr-%SOLR_PORT%"
echo %SOLR_PORT%>"%SOLR_TIP%"\bin\solr-%SOLR_PORT%.port echo %SOLR_PORT%>"%SOLR_TIP%"\bin\solr-%SOLR_PORT%.port
"%JAVA%" %SERVEROPT% %SOLR_JAVA_MEM% %START_OPTS% %GCLOG_OPT% ^ "%JAVA%" %SERVEROPT% %SOLR_JAVA_MEM% %START_OPTS% ^
-Dlog4j.configuration="%LOG4J_CONFIG%" -DSTOP.PORT=!STOP_PORT! -DSTOP.KEY=%STOP_KEY% ^ -Dlog4j.configuration="%LOG4J_CONFIG%" -DSTOP.PORT=!STOP_PORT! -DSTOP.KEY=%STOP_KEY% ^
-Dsolr.solr.home="%SOLR_HOME%" -Dsolr.install.dir="%SOLR_TIP%" -Dsolr.default.confdir="%DEFAULT_CONFDIR%" ^ -Dsolr.solr.home="%SOLR_HOME%" -Dsolr.install.dir="%SOLR_TIP%" -Dsolr.default.confdir="%DEFAULT_CONFDIR%" ^
-Djetty.host=%SOLR_JETTY_HOST% -Djetty.port=%SOLR_PORT% -Djetty.home="%SOLR_SERVER_DIR%" ^ -Djetty.host=%SOLR_JETTY_HOST% -Djetty.port=%SOLR_PORT% -Djetty.home="%SOLR_SERVER_DIR%" ^
-Djava.io.tmpdir="%SOLR_SERVER_DIR%\tmp" -jar start.jar "%SOLR_JETTY_CONFIG%" "%SOLR_JETTY_ADDL_CONFIG%" -Djava.io.tmpdir="%SOLR_SERVER_DIR%\tmp" -jar start.jar "%SOLR_JETTY_CONFIG%" "%SOLR_JETTY_ADDL_CONFIG%"
) ELSE ( ) ELSE (
START /B "Solr-%SOLR_PORT%" /D "%SOLR_SERVER_DIR%" ^ START /B "Solr-%SOLR_PORT%" /D "%SOLR_SERVER_DIR%" ^
"%JAVA%" %SERVEROPT% %SOLR_JAVA_MEM% %START_OPTS% %GCLOG_OPT% ^ "%JAVA%" %SERVEROPT% %SOLR_JAVA_MEM% %START_OPTS% ^
-Dlog4j.configuration="%LOG4J_CONFIG%" -DSTOP.PORT=!STOP_PORT! -DSTOP.KEY=%STOP_KEY% ^ -Dlog4j.configuration="%LOG4J_CONFIG%" -DSTOP.PORT=!STOP_PORT! -DSTOP.KEY=%STOP_KEY% ^
-Dsolr.log.muteconsole ^ -Dsolr.log.muteconsole ^
-Dsolr.solr.home="%SOLR_HOME%" -Dsolr.install.dir="%SOLR_TIP%" -Dsolr.default.confdir="%DEFAULT_CONFDIR%" ^ -Dsolr.solr.home="%SOLR_HOME%" -Dsolr.install.dir="%SOLR_TIP%" -Dsolr.default.confdir="%DEFAULT_CONFDIR%" ^
@ -1843,8 +1851,12 @@ FOR /f "usebackq tokens=3" %%a IN (`^""%JAVA%" -version 2^>^&1 ^| findstr "versi
set JAVA_VERSION_INFO=!JAVA_VERSION_INFO:"=! set JAVA_VERSION_INFO=!JAVA_VERSION_INFO:"=!
REM Extract the major Java version, e.g. 7, 8, 9, 10 ... REM Extract the major Java version, e.g. 7, 8, 9, 10 ...
for /f "tokens=2 delims=." %%a in ("!JAVA_VERSION_INFO!") do ( for /f "tokens=1,2 delims=." %%a in ("!JAVA_VERSION_INFO!") do (
set JAVA_MAJOR_VERSION=%%a if "%%a" GEQ "9" (
set JAVA_MAJOR_VERSION=%%a
) else (
set JAVA_MAJOR_VERSION=%%b
)
) )
REM Don't look for "_{build}" if we're on IBM J9. REM Don't look for "_{build}" if we're on IBM J9.

View File

@ -27,7 +27,9 @@ REM set SOLR_JAVA_HOME=
REM Increase Java Min/Max Heap as needed to support your indexing / query needs REM Increase Java Min/Max Heap as needed to support your indexing / query needs
REM set SOLR_JAVA_MEM=-Xms512m -Xmx512m REM set SOLR_JAVA_MEM=-Xms512m -Xmx512m
REM Enable verbose GC logging REM Configure verbose GC logging:
REM For Java 8: if this is set, additional params will be added to specify the log file & rotation
REM For Java 9 or higher: GC_LOG_OPTS is currently not supported. If you set it, the startup script will exit with failure.
REM set GC_LOG_OPTS=-verbose:gc -XX:+PrintHeapAtGC -XX:+PrintGCDetails -XX:+PrintGCDateStamps -XX:+PrintGCTimeStamps -XX:+PrintTenuringDistribution -XX:+PrintGCApplicationStoppedTime REM set GC_LOG_OPTS=-verbose:gc -XX:+PrintHeapAtGC -XX:+PrintGCDetails -XX:+PrintGCDateStamps -XX:+PrintGCTimeStamps -XX:+PrintTenuringDistribution -XX:+PrintGCApplicationStoppedTime
REM Various GC settings have shown to work well for a number of common Solr workloads. REM Various GC settings have shown to work well for a number of common Solr workloads.

View File

@ -36,12 +36,12 @@
# Enable verbose GC logging... # Enable verbose GC logging...
# * If this is unset, various default options will be selected depending on which JVM version is in use # * If this is unset, various default options will be selected depending on which JVM version is in use
# * For java8 or lower: if this is set, additional params will be added to specify the log file & rotation # * For Java 8: if this is set, additional params will be added to specify the log file & rotation
# * For java9 or higher: each included opt param that starts with '-Xlog:gc', but does not include an output # * For Java 9 or higher: each included opt param that starts with '-Xlog:gc', but does not include an
# specifier, will have a 'file' output specifier (as well as formatting & rollover options) appended, # output specifier, will have a 'file' output specifier (as well as formatting & rollover options)
# using the effective value of the SOLR_LOGS_DIR. # appended, using the effective value of the SOLR_LOGS_DIR.
# #
#GC_LOG_OPTS='-Xlog:gc*' # (java9) #GC_LOG_OPTS='-Xlog:gc*' # (Java 9+)
#GC_LOG_OPTS="-verbose:gc -XX:+PrintHeapAtGC -XX:+PrintGCDetails \ #GC_LOG_OPTS="-verbose:gc -XX:+PrintHeapAtGC -XX:+PrintGCDetails \
# -XX:+PrintGCDateStamps -XX:+PrintGCTimeStamps -XX:+PrintTenuringDistribution -XX:+PrintGCApplicationStoppedTime" # -XX:+PrintGCDateStamps -XX:+PrintGCTimeStamps -XX:+PrintTenuringDistribution -XX:+PrintGCApplicationStoppedTime"