mirror of https://github.com/apache/lucene.git
Merge branch 'master' of https://git-wip-us.apache.org/repos/asf/lucene-solr
This commit is contained in:
commit
ebc3916b04
|
@ -15,6 +15,11 @@ Changes in Runtime Behavior
|
|||
|
||||
======================= Lucene 7.1.0 =======================
|
||||
|
||||
New Features
|
||||
|
||||
* LUCENE-7621: Add CoveringQuery, a query whose required number of matching
|
||||
clauses can be defined per document. (Adrien Grand)
|
||||
|
||||
Optimizations
|
||||
|
||||
* LUCENE-7905: Optimize how OrdinalMap (used by
|
||||
|
@ -28,6 +33,9 @@ Optimizations
|
|||
than 8x greater than the cost of the lead iterator in order to use doc values.
|
||||
(Murali Krishna P via Adrien Grand)
|
||||
|
||||
* LUCENE-7925: Collapse duplicate SHOULD or MUST clauses by summing up their
|
||||
boosts. (Adrien Grand)
|
||||
|
||||
Bug Fixes
|
||||
|
||||
* LUCENE-7916: Prevent ArrayIndexOutOfBoundsException if ICUTokenizer is used
|
||||
|
@ -35,6 +43,12 @@ Bug Fixes
|
|||
not recommended, lucene-analyzers-icu contains binary data structures
|
||||
specific to ICU/Unicode versions it is built against. (Chris Koenig, Robert Muir)
|
||||
|
||||
Build
|
||||
|
||||
* SOLR-11181: Switch order of maven artifact publishing procedure: deploy first
|
||||
instead of locally installing first, to workaround a double repository push of
|
||||
*-sources.jar and *-javadoc.jar files. (Lynn Monson via Steve Rowe)
|
||||
|
||||
======================= Lucene 7.0.0 =======================
|
||||
|
||||
New Features
|
||||
|
|
|
@ -559,10 +559,6 @@
|
|||
<artifact:install-provider artifactId="wagon-ssh" version="1.0-beta-7"/>
|
||||
<parent-poms/>
|
||||
<artifact:pom id="maven.project" file="@{pom.xml}"/>
|
||||
<artifact:install file="@{jar.file}">
|
||||
<artifact-attachments/>
|
||||
<pom refid="maven.project"/>
|
||||
</artifact:install>
|
||||
<artifact:deploy file="@{jar.file}">
|
||||
<artifact-attachments/>
|
||||
<remoteRepository id="${m2.repository.id}" url="${m2.repository.url}">
|
||||
|
@ -570,6 +566,10 @@
|
|||
</remoteRepository>
|
||||
<pom refid="maven.project"/>
|
||||
</artifact:deploy>
|
||||
<artifact:install file="@{jar.file}">
|
||||
<artifact-attachments/>
|
||||
<pom refid="maven.project"/>
|
||||
</artifact:install>
|
||||
</sequential>
|
||||
</macrodef>
|
||||
|
||||
|
|
|
@ -23,6 +23,7 @@ import java.util.Arrays;
|
|||
import java.util.Collection;
|
||||
import java.util.Collections;
|
||||
import java.util.EnumMap;
|
||||
import java.util.HashMap;
|
||||
import java.util.HashSet;
|
||||
import java.util.Iterator;
|
||||
import java.util.List;
|
||||
|
@ -332,6 +333,69 @@ public class BooleanQuery extends Query implements Iterable<BooleanClause> {
|
|||
}
|
||||
}
|
||||
|
||||
// Deduplicate SHOULD clauses by summing up their boosts
|
||||
if (clauseSets.get(Occur.SHOULD).size() > 0 && minimumNumberShouldMatch <= 1) {
|
||||
Map<Query, Double> shouldClauses = new HashMap<>();
|
||||
for (Query query : clauseSets.get(Occur.SHOULD)) {
|
||||
double boost = 1;
|
||||
while (query instanceof BoostQuery) {
|
||||
BoostQuery bq = (BoostQuery) query;
|
||||
boost *= bq.getBoost();
|
||||
query = bq.getQuery();
|
||||
}
|
||||
shouldClauses.put(query, shouldClauses.getOrDefault(query, 0d) + boost);
|
||||
}
|
||||
if (shouldClauses.size() != clauseSets.get(Occur.SHOULD).size()) {
|
||||
BooleanQuery.Builder builder = new BooleanQuery.Builder()
|
||||
.setMinimumNumberShouldMatch(minimumNumberShouldMatch);
|
||||
for (Map.Entry<Query,Double> entry : shouldClauses.entrySet()) {
|
||||
Query query = entry.getKey();
|
||||
float boost = entry.getValue().floatValue();
|
||||
if (boost != 1f) {
|
||||
query = new BoostQuery(query, boost);
|
||||
}
|
||||
builder.add(query, Occur.SHOULD);
|
||||
}
|
||||
for (BooleanClause clause : clauses) {
|
||||
if (clause.getOccur() != Occur.SHOULD) {
|
||||
builder.add(clause);
|
||||
}
|
||||
}
|
||||
return builder.build();
|
||||
}
|
||||
}
|
||||
|
||||
// Deduplicate MUST clauses by summing up their boosts
|
||||
if (clauseSets.get(Occur.MUST).size() > 0) {
|
||||
Map<Query, Double> mustClauses = new HashMap<>();
|
||||
for (Query query : clauseSets.get(Occur.MUST)) {
|
||||
double boost = 1;
|
||||
while (query instanceof BoostQuery) {
|
||||
BoostQuery bq = (BoostQuery) query;
|
||||
boost *= bq.getBoost();
|
||||
query = bq.getQuery();
|
||||
}
|
||||
mustClauses.put(query, mustClauses.getOrDefault(query, 0d) + boost);
|
||||
}
|
||||
if (mustClauses.size() != clauseSets.get(Occur.MUST).size()) {
|
||||
BooleanQuery.Builder builder = new BooleanQuery.Builder()
|
||||
.setMinimumNumberShouldMatch(minimumNumberShouldMatch);
|
||||
for (Map.Entry<Query,Double> entry : mustClauses.entrySet()) {
|
||||
Query query = entry.getKey();
|
||||
float boost = entry.getValue().floatValue();
|
||||
if (boost != 1f) {
|
||||
query = new BoostQuery(query, boost);
|
||||
}
|
||||
builder.add(query, Occur.MUST);
|
||||
}
|
||||
for (BooleanClause clause : clauses) {
|
||||
if (clause.getOccur() != Occur.MUST) {
|
||||
builder.add(clause);
|
||||
}
|
||||
}
|
||||
return builder.build();
|
||||
}
|
||||
}
|
||||
|
||||
// Rewrite queries whose single scoring clause is a MUST clause on a
|
||||
// MatchAllDocsQuery to a ConstantScoreQuery
|
||||
|
|
|
@ -427,4 +427,57 @@ public class TestBooleanRewrites extends LuceneTestCase {
|
|||
assertEquals(expectedScore, actualScore, expectedScore / 100); // error under 1%
|
||||
}
|
||||
}
|
||||
|
||||
public void testDeduplicateShouldClauses() throws IOException {
|
||||
IndexSearcher searcher = newSearcher(new MultiReader());
|
||||
|
||||
Query query = new BooleanQuery.Builder()
|
||||
.add(new TermQuery(new Term("foo", "bar")), Occur.SHOULD)
|
||||
.add(new TermQuery(new Term("foo", "bar")), Occur.SHOULD)
|
||||
.build();
|
||||
Query expected = new BoostQuery(new TermQuery(new Term("foo", "bar")), 2);
|
||||
assertEquals(expected, searcher.rewrite(query));
|
||||
|
||||
query = new BooleanQuery.Builder()
|
||||
.add(new TermQuery(new Term("foo", "bar")), Occur.SHOULD)
|
||||
.add(new BoostQuery(new TermQuery(new Term("foo", "bar")), 2), Occur.SHOULD)
|
||||
.add(new TermQuery(new Term("foo", "quux")), Occur.SHOULD)
|
||||
.build();
|
||||
expected = new BooleanQuery.Builder()
|
||||
.add(new BoostQuery(new TermQuery(new Term("foo", "bar")), 3), Occur.SHOULD)
|
||||
.add(new TermQuery(new Term("foo", "quux")), Occur.SHOULD)
|
||||
.build();
|
||||
assertEquals(expected, searcher.rewrite(query));
|
||||
|
||||
query = new BooleanQuery.Builder()
|
||||
.setMinimumNumberShouldMatch(2)
|
||||
.add(new TermQuery(new Term("foo", "bar")), Occur.SHOULD)
|
||||
.add(new TermQuery(new Term("foo", "bar")), Occur.SHOULD)
|
||||
.add(new TermQuery(new Term("foo", "quux")), Occur.SHOULD)
|
||||
.build();
|
||||
expected = query;
|
||||
assertEquals(expected, searcher.rewrite(query));
|
||||
}
|
||||
|
||||
public void testDeduplicateMustClauses() throws IOException {
|
||||
IndexSearcher searcher = newSearcher(new MultiReader());
|
||||
|
||||
Query query = new BooleanQuery.Builder()
|
||||
.add(new TermQuery(new Term("foo", "bar")), Occur.MUST)
|
||||
.add(new TermQuery(new Term("foo", "bar")), Occur.MUST)
|
||||
.build();
|
||||
Query expected = new BoostQuery(new TermQuery(new Term("foo", "bar")), 2);
|
||||
assertEquals(expected, searcher.rewrite(query));
|
||||
|
||||
query = new BooleanQuery.Builder()
|
||||
.add(new TermQuery(new Term("foo", "bar")), Occur.MUST)
|
||||
.add(new BoostQuery(new TermQuery(new Term("foo", "bar")), 2), Occur.MUST)
|
||||
.add(new TermQuery(new Term("foo", "quux")), Occur.MUST)
|
||||
.build();
|
||||
expected = new BooleanQuery.Builder()
|
||||
.add(new BoostQuery(new TermQuery(new Term("foo", "bar")), 3), Occur.MUST)
|
||||
.add(new TermQuery(new Term("foo", "quux")), Occur.MUST)
|
||||
.build();
|
||||
assertEquals(expected, searcher.rewrite(query));
|
||||
}
|
||||
}
|
||||
|
|
|
@ -90,7 +90,8 @@ public class TestConstantScoreQuery extends LuceneTestCase {
|
|||
RandomIndexWriter writer = new RandomIndexWriter (random(), directory);
|
||||
|
||||
Document doc = new Document();
|
||||
doc.add(newStringField("field", "term", Field.Store.NO));
|
||||
doc.add(newStringField("field", "term1", Field.Store.NO));
|
||||
doc.add(newStringField("field", "term2", Field.Store.NO));
|
||||
writer.addDocument(doc);
|
||||
|
||||
reader = writer.getReader();
|
||||
|
@ -99,8 +100,8 @@ public class TestConstantScoreQuery extends LuceneTestCase {
|
|||
searcher = newSearcher(reader, true, false);
|
||||
searcher.setQueryCache(null); // to assert on scorer impl
|
||||
|
||||
final BoostQuery csq1 = new BoostQuery(new ConstantScoreQuery(new TermQuery(new Term ("field", "term"))), 2f);
|
||||
final BoostQuery csq2 = new BoostQuery(new ConstantScoreQuery(csq1), 5f);
|
||||
final BoostQuery csq1 = new BoostQuery(new ConstantScoreQuery(new TermQuery(new Term ("field", "term1"))), 2f);
|
||||
final BoostQuery csq2 = new BoostQuery(new ConstantScoreQuery(new ConstantScoreQuery(new TermQuery(new Term ("field", "term2")))), 5f);
|
||||
|
||||
final BooleanQuery.Builder bq = new BooleanQuery.Builder();
|
||||
bq.add(csq1, BooleanClause.Occur.SHOULD);
|
||||
|
|
|
@ -0,0 +1,180 @@
|
|||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
package org.apache.lucene.search;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.util.ArrayList;
|
||||
import java.util.Collection;
|
||||
import java.util.List;
|
||||
import java.util.Objects;
|
||||
import java.util.Set;
|
||||
import java.util.stream.Collectors;
|
||||
|
||||
import org.apache.lucene.index.IndexReader;
|
||||
import org.apache.lucene.index.LeafReaderContext;
|
||||
import org.apache.lucene.index.Term;
|
||||
|
||||
/** A {@link Query} that allows to have a configurable number or required
|
||||
* matches per document. This is typically useful in order to build queries
|
||||
* whose query terms must all appear in documents.
|
||||
* @lucene.experimental
|
||||
*/
|
||||
public final class CoveringQuery extends Query {
|
||||
|
||||
private final Collection<Query> queries;
|
||||
private final LongValuesSource minimumNumberMatch;
|
||||
private final int hashCode;
|
||||
|
||||
/**
|
||||
* Sole constructor.
|
||||
* @param queries Sub queries to match.
|
||||
* @param minimumNumberMatch Per-document long value that records how many queries
|
||||
* should match. Values that are less than 1 are treated
|
||||
* like <tt>1</tt>: only documents that have at least one
|
||||
* matching clause will be considered matches. Documents
|
||||
* that do not have a value for <tt>minimumNumberMatch</tt>
|
||||
* do not match.
|
||||
*/
|
||||
public CoveringQuery(Collection<Query> queries, LongValuesSource minimumNumberMatch) {
|
||||
if (queries.size() > BooleanQuery.getMaxClauseCount()) {
|
||||
throw new BooleanQuery.TooManyClauses();
|
||||
}
|
||||
if (minimumNumberMatch.needsScores()) {
|
||||
throw new IllegalArgumentException("The minimum number of matches may not depend on the score.");
|
||||
}
|
||||
this.queries = new Multiset<>();
|
||||
this.queries.addAll(queries);
|
||||
this.minimumNumberMatch = Objects.requireNonNull(minimumNumberMatch);
|
||||
this.hashCode = computeHashCode();
|
||||
}
|
||||
|
||||
@Override
|
||||
public String toString(String field) {
|
||||
String queriesToString = queries.stream()
|
||||
.map(q -> q.toString(field))
|
||||
.sorted()
|
||||
.collect(Collectors.joining(", "));
|
||||
return "CoveringQuery(queries=[" + queriesToString + "], minimumNumberMatch=" + minimumNumberMatch + ")";
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean equals(Object obj) {
|
||||
if (sameClassAs(obj) == false) {
|
||||
return false;
|
||||
}
|
||||
CoveringQuery that = (CoveringQuery) obj;
|
||||
return hashCode == that.hashCode // not necessary but makes equals faster
|
||||
&& Objects.equals(queries, that.queries)
|
||||
&& Objects.equals(minimumNumberMatch, that.minimumNumberMatch);
|
||||
}
|
||||
|
||||
private int computeHashCode() {
|
||||
int h = classHash();
|
||||
h = 31 * h + queries.hashCode();
|
||||
h = 31 * h + minimumNumberMatch.hashCode();
|
||||
return h;
|
||||
}
|
||||
|
||||
@Override
|
||||
public int hashCode() {
|
||||
return hashCode;
|
||||
}
|
||||
|
||||
@Override
|
||||
public Query rewrite(IndexReader reader) throws IOException {
|
||||
Multiset<Query> rewritten = new Multiset<>();
|
||||
boolean actuallyRewritten = false;
|
||||
for (Query query : queries) {
|
||||
Query r = query.rewrite(reader);
|
||||
rewritten.add(r);
|
||||
actuallyRewritten |= query != r;
|
||||
}
|
||||
if (actuallyRewritten) {
|
||||
return new CoveringQuery(rewritten, minimumNumberMatch);
|
||||
}
|
||||
return super.rewrite(reader);
|
||||
}
|
||||
|
||||
@Override
|
||||
public Weight createWeight(IndexSearcher searcher, boolean needsScores, float boost) throws IOException {
|
||||
final List<Weight> weights = new ArrayList<>(queries.size());
|
||||
for (Query query : queries) {
|
||||
weights.add(searcher.createWeight(query, needsScores, boost));
|
||||
}
|
||||
return new CoveringWeight(this, weights, minimumNumberMatch);
|
||||
}
|
||||
|
||||
private static class CoveringWeight extends Weight {
|
||||
|
||||
private final Collection<Weight> weights;
|
||||
private final LongValuesSource minimumNumberMatch;
|
||||
|
||||
CoveringWeight(Query query, Collection<Weight> weights, LongValuesSource minimumNumberMatch) {
|
||||
super(query);
|
||||
this.weights = weights;
|
||||
this.minimumNumberMatch = minimumNumberMatch;
|
||||
}
|
||||
|
||||
@Override
|
||||
public void extractTerms(Set<Term> terms) {
|
||||
for (Weight weight : weights) {
|
||||
weight.extractTerms(terms);
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public Explanation explain(LeafReaderContext context, int doc) throws IOException {
|
||||
LongValues minMatchValues = minimumNumberMatch.getValues(context, null);
|
||||
if (minMatchValues.advanceExact(doc) == false) {
|
||||
return Explanation.noMatch("minimumNumberMatch has no value on the current document");
|
||||
}
|
||||
final long minimumNumberMatch = Math.max(1, minMatchValues.longValue());
|
||||
int freq = 0;
|
||||
double score = 0;
|
||||
List<Explanation> subExpls = new ArrayList<>();
|
||||
for (Weight weight : weights) {
|
||||
Explanation subExpl = weight.explain(context, doc);
|
||||
if (subExpl.isMatch()) {
|
||||
freq++;
|
||||
score += subExpl.getValue();
|
||||
}
|
||||
subExpls.add(subExpl);
|
||||
}
|
||||
if (freq >= minimumNumberMatch) {
|
||||
return Explanation.match((float) score, freq + " matches for " + minimumNumberMatch + " required matches, sum of:", subExpls);
|
||||
} else {
|
||||
return Explanation.noMatch(freq + " matches for " + minimumNumberMatch + " required matches", subExpls);
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public Scorer scorer(LeafReaderContext context) throws IOException {
|
||||
Collection<Scorer> scorers = new ArrayList<>();
|
||||
for (Weight w : weights) {
|
||||
Scorer s = w.scorer(context);
|
||||
if (s != null) {
|
||||
scorers.add(s);
|
||||
}
|
||||
}
|
||||
if (scorers.isEmpty()) {
|
||||
return null;
|
||||
}
|
||||
return new CoveringScorer(this, scorers, minimumNumberMatch.getValues(context, null), context.reader().maxDoc());
|
||||
}
|
||||
}
|
||||
|
||||
}
|
|
@ -0,0 +1,225 @@
|
|||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
package org.apache.lucene.search;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.util.ArrayList;
|
||||
import java.util.Collection;
|
||||
import java.util.List;
|
||||
|
||||
/** A {@link Scorer} whose number of matches is per-document. */
|
||||
final class CoveringScorer extends Scorer {
|
||||
|
||||
final int numScorers;
|
||||
final int maxDoc;
|
||||
final LongValues minMatchValues;
|
||||
|
||||
boolean matches; // if true then the doc matches, otherwise we don't know and need to check
|
||||
int doc; // current doc ID
|
||||
DisiWrapper topList; // list of matches
|
||||
int freq; // number of scorers on the desired doc ID
|
||||
long minMatch; // current required number of matches
|
||||
|
||||
// priority queue that stores all scorers
|
||||
final DisiPriorityQueue subScorers;
|
||||
|
||||
final long cost;
|
||||
|
||||
CoveringScorer(Weight weight, Collection<Scorer> scorers, LongValues minMatchValues, int maxDoc) {
|
||||
super(weight);
|
||||
|
||||
this.numScorers = scorers.size();
|
||||
this.maxDoc = maxDoc;
|
||||
this.minMatchValues = minMatchValues;
|
||||
this.doc = -1;
|
||||
|
||||
subScorers = new DisiPriorityQueue(scorers.size());
|
||||
|
||||
for (Scorer scorer : scorers) {
|
||||
subScorers.add(new DisiWrapper(scorer));
|
||||
}
|
||||
|
||||
this.cost = scorers.stream().map(Scorer::iterator).mapToLong(DocIdSetIterator::cost).sum();
|
||||
}
|
||||
|
||||
@Override
|
||||
public final Collection<ChildScorer> getChildren() throws IOException {
|
||||
List<ChildScorer> matchingChildren = new ArrayList<>();
|
||||
setTopListAndFreqIfNecessary();
|
||||
for (DisiWrapper s = topList; s != null; s = s.next) {
|
||||
matchingChildren.add(new ChildScorer(s.scorer, "SHOULD"));
|
||||
}
|
||||
return matchingChildren;
|
||||
}
|
||||
|
||||
private final DocIdSetIterator approximation = new DocIdSetIterator() {
|
||||
|
||||
@Override
|
||||
public int docID() {
|
||||
return doc;
|
||||
}
|
||||
|
||||
@Override
|
||||
public int nextDoc() throws IOException {
|
||||
return advance(docID() + 1);
|
||||
}
|
||||
|
||||
@Override
|
||||
public int advance(int target) throws IOException {
|
||||
// reset state
|
||||
matches = false;
|
||||
topList = null;
|
||||
|
||||
doc = target;
|
||||
setMinMatch();
|
||||
|
||||
DisiWrapper top = subScorers.top();
|
||||
int numMatches = 0;
|
||||
int maxPotentialMatches = numScorers;
|
||||
while (top.doc < target) {
|
||||
if (maxPotentialMatches < minMatch) {
|
||||
// No need to keep trying to advance to `target` since no match is possible.
|
||||
if (target >= maxDoc - 1) {
|
||||
doc = NO_MORE_DOCS;
|
||||
} else {
|
||||
doc = target + 1;
|
||||
}
|
||||
setMinMatch();
|
||||
return doc;
|
||||
}
|
||||
top.doc = top.iterator.advance(target);
|
||||
boolean match = top.doc == target;
|
||||
top = subScorers.updateTop();
|
||||
if (match) {
|
||||
numMatches++;
|
||||
if (numMatches >= minMatch) {
|
||||
// success, no need to check other iterators
|
||||
matches = true;
|
||||
return doc;
|
||||
}
|
||||
} else {
|
||||
maxPotentialMatches--;
|
||||
}
|
||||
}
|
||||
|
||||
doc = top.doc;
|
||||
setMinMatch();
|
||||
return doc;
|
||||
}
|
||||
|
||||
private void setMinMatch() throws IOException {
|
||||
if (doc >= maxDoc) {
|
||||
// advanceExact may not be called on out-of-range doc ids
|
||||
minMatch = 1;
|
||||
} else if (minMatchValues.advanceExact(doc)) {
|
||||
// values < 1 are treated as 1: we require at least one match
|
||||
minMatch = Math.max(1, minMatchValues.longValue());
|
||||
} else {
|
||||
// this will make sure the document does not match
|
||||
minMatch = Long.MAX_VALUE;
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public long cost() {
|
||||
return maxDoc;
|
||||
}
|
||||
|
||||
};
|
||||
|
||||
private final TwoPhaseIterator twoPhase = new TwoPhaseIterator(approximation) {
|
||||
|
||||
@Override
|
||||
public boolean matches() throws IOException {
|
||||
if (matches) {
|
||||
return true;
|
||||
}
|
||||
if (topList == null) {
|
||||
advanceAll(doc);
|
||||
}
|
||||
if (subScorers.top().doc != doc) {
|
||||
assert subScorers.top().doc > doc;
|
||||
return false;
|
||||
}
|
||||
setTopListAndFreq();
|
||||
assert topList.doc == doc;
|
||||
return matches = freq >= minMatch;
|
||||
}
|
||||
|
||||
@Override
|
||||
public float matchCost() {
|
||||
return numScorers;
|
||||
}
|
||||
|
||||
};
|
||||
|
||||
@Override
|
||||
public DocIdSetIterator iterator() {
|
||||
return TwoPhaseIterator.asDocIdSetIterator(twoPhase);
|
||||
}
|
||||
|
||||
@Override
|
||||
public TwoPhaseIterator twoPhaseIterator() {
|
||||
return twoPhase;
|
||||
}
|
||||
|
||||
private void advanceAll(int target) throws IOException {
|
||||
DisiWrapper top = subScorers.top();
|
||||
while (top.doc < target) {
|
||||
top.doc = top.iterator.advance(target);
|
||||
top = subScorers.updateTop();
|
||||
}
|
||||
}
|
||||
|
||||
private void setTopListAndFreq() {
|
||||
topList = subScorers.topList();
|
||||
freq = 0;
|
||||
for (DisiWrapper w = topList; w != null; w = w.next) {
|
||||
freq++;
|
||||
}
|
||||
}
|
||||
|
||||
private void setTopListAndFreqIfNecessary() throws IOException {
|
||||
if (topList == null) {
|
||||
advanceAll(doc);
|
||||
setTopListAndFreq();
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public int freq() throws IOException {
|
||||
setTopListAndFreqIfNecessary();
|
||||
return freq;
|
||||
}
|
||||
|
||||
@Override
|
||||
public float score() throws IOException {
|
||||
// we need to know about all matches
|
||||
setTopListAndFreqIfNecessary();
|
||||
double score = 0;
|
||||
for (DisiWrapper w = topList; w != null; w = w.next) {
|
||||
score += w.scorer.score();
|
||||
}
|
||||
return (float) score;
|
||||
}
|
||||
|
||||
@Override
|
||||
public int docID() {
|
||||
return doc;
|
||||
}
|
||||
|
||||
}
|
|
@ -0,0 +1,157 @@
|
|||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
package org.apache.lucene.search;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.util.ArrayList;
|
||||
import java.util.Arrays;
|
||||
import java.util.Collections;
|
||||
import java.util.List;
|
||||
|
||||
import org.apache.lucene.document.Document;
|
||||
import org.apache.lucene.document.Field.Store;
|
||||
import org.apache.lucene.document.NumericDocValuesField;
|
||||
import org.apache.lucene.document.StringField;
|
||||
import org.apache.lucene.index.DirectoryReader;
|
||||
import org.apache.lucene.index.IndexReader;
|
||||
import org.apache.lucene.index.IndexWriter;
|
||||
import org.apache.lucene.index.MultiReader;
|
||||
import org.apache.lucene.index.Term;
|
||||
import org.apache.lucene.search.BooleanClause.Occur;
|
||||
import org.apache.lucene.store.Directory;
|
||||
import org.apache.lucene.util.LuceneTestCase;
|
||||
|
||||
public class TestCoveringQuery extends LuceneTestCase {
|
||||
|
||||
public void testEquals() {
|
||||
TermQuery tq1 = new TermQuery(new Term("foo", "bar"));
|
||||
TermQuery tq2 = new TermQuery(new Term("foo", "quux"));
|
||||
LongValuesSource vs = LongValuesSource.fromLongField("field");
|
||||
|
||||
CoveringQuery q1 = new CoveringQuery(Arrays.asList(tq1, tq2), vs);
|
||||
CoveringQuery q2 = new CoveringQuery(Arrays.asList(tq1, tq2), vs);
|
||||
QueryUtils.checkEqual(q1, q2);
|
||||
|
||||
// order does not matter
|
||||
CoveringQuery q3 = new CoveringQuery(Arrays.asList(tq2, tq1), vs);
|
||||
QueryUtils.checkEqual(q1, q3);
|
||||
|
||||
// values source matters
|
||||
CoveringQuery q4 = new CoveringQuery(Arrays.asList(tq2, tq1), LongValuesSource.fromLongField("other_field"));
|
||||
QueryUtils.checkUnequal(q1, q4);
|
||||
|
||||
// duplicates matter
|
||||
CoveringQuery q5 = new CoveringQuery(Arrays.asList(tq1, tq1, tq2), vs);
|
||||
CoveringQuery q6 = new CoveringQuery(Arrays.asList(tq1, tq2, tq2), vs);
|
||||
QueryUtils.checkUnequal(q5, q6);
|
||||
|
||||
// query matters
|
||||
CoveringQuery q7 = new CoveringQuery(Arrays.asList(tq1), vs);
|
||||
CoveringQuery q8 = new CoveringQuery(Arrays.asList(tq2), vs);
|
||||
QueryUtils.checkUnequal(q7, q8);
|
||||
}
|
||||
|
||||
public void testRewrite() throws IOException {
|
||||
PhraseQuery pq = new PhraseQuery("foo", "bar");
|
||||
TermQuery tq = new TermQuery(new Term("foo", "bar"));
|
||||
LongValuesSource vs = LongValuesSource.fromIntField("field");
|
||||
assertEquals(
|
||||
new CoveringQuery(Collections.singleton(tq), vs),
|
||||
new CoveringQuery(Collections.singleton(pq), vs).rewrite(new MultiReader()));
|
||||
}
|
||||
|
||||
public void testToString() {
|
||||
TermQuery tq1 = new TermQuery(new Term("foo", "bar"));
|
||||
TermQuery tq2 = new TermQuery(new Term("foo", "quux"));
|
||||
LongValuesSource vs = LongValuesSource.fromIntField("field");
|
||||
CoveringQuery q = new CoveringQuery(Arrays.asList(tq1, tq2), vs);
|
||||
assertEquals("CoveringQuery(queries=[foo:bar, foo:quux], minimumNumberMatch=long(field))", q.toString());
|
||||
assertEquals("CoveringQuery(queries=[bar, quux], minimumNumberMatch=long(field))", q.toString("foo"));
|
||||
}
|
||||
|
||||
public void testRandom() throws IOException {
|
||||
Directory dir = newDirectory();
|
||||
IndexWriter w = new IndexWriter(dir, newIndexWriterConfig());
|
||||
int numDocs = atLeast(200);
|
||||
for (int i = 0; i < numDocs; ++i) {
|
||||
Document doc = new Document();
|
||||
if (random().nextBoolean()) {
|
||||
doc.add(new StringField("field", "A", Store.NO));
|
||||
}
|
||||
if (random().nextBoolean()) {
|
||||
doc.add(new StringField("field", "B", Store.NO));
|
||||
}
|
||||
if (random().nextDouble() > 0.9) {
|
||||
doc.add(new StringField("field", "C", Store.NO));
|
||||
}
|
||||
if (random().nextDouble() > 0.1) {
|
||||
doc.add(new StringField("field", "D", Store.NO));
|
||||
}
|
||||
doc.add(new NumericDocValuesField("min_match", random().nextInt(6)));
|
||||
w.addDocument(doc);
|
||||
}
|
||||
|
||||
IndexReader r = DirectoryReader.open(w);
|
||||
IndexSearcher searcher = new IndexSearcher(r);
|
||||
w.close();
|
||||
|
||||
int iters = atLeast(10);
|
||||
for (int iter = 0; iter < iters; ++iter) {
|
||||
List<Query> queries = new ArrayList<>();
|
||||
if (random().nextBoolean()) {
|
||||
queries.add(new TermQuery(new Term("field", "A")));
|
||||
}
|
||||
if (random().nextBoolean()) {
|
||||
queries.add(new TermQuery(new Term("field", "B")));
|
||||
}
|
||||
if (random().nextBoolean()) {
|
||||
queries.add(new TermQuery(new Term("field", "C")));
|
||||
}
|
||||
if (random().nextBoolean()) {
|
||||
queries.add(new TermQuery(new Term("field", "D")));
|
||||
}
|
||||
if (random().nextBoolean()) {
|
||||
queries.add(new TermQuery(new Term("field", "E")));
|
||||
}
|
||||
|
||||
Query q = new CoveringQuery(queries, LongValuesSource.fromLongField("min_match"));
|
||||
QueryUtils.check(random(), q, searcher);
|
||||
|
||||
for (int i = 1; i < 4; ++i) {
|
||||
BooleanQuery.Builder builder = new BooleanQuery.Builder()
|
||||
.setMinimumNumberShouldMatch(i);
|
||||
for (Query query : queries) {
|
||||
builder.add(query, Occur.SHOULD);
|
||||
}
|
||||
Query q1 = builder.build();
|
||||
Query q2 = new CoveringQuery(queries, LongValuesSource.constant(i));
|
||||
assertEquals(
|
||||
searcher.count(q1),
|
||||
searcher.count(q2));
|
||||
}
|
||||
|
||||
Query filtered = new BooleanQuery.Builder()
|
||||
.add(q, Occur.MUST)
|
||||
.add(new TermQuery(new Term("field", "A")), Occur.MUST)
|
||||
.build();
|
||||
QueryUtils.check(random(), filtered, searcher);
|
||||
}
|
||||
|
||||
r.close();
|
||||
dir.close();
|
||||
}
|
||||
}
|
|
@ -296,6 +296,10 @@ Upgrading from Solr 6.x
|
|||
* V2 APIs are now available at /api, in addition to /v2 (which is now deprecated). Legacy APIs continue to remain
|
||||
available at /solr.
|
||||
|
||||
* Solr was tested and is compatible with the final release candidate of Java 9. All startup scripts
|
||||
detect Java 9 correctly and setup Garbage Collector logging. If the configuration file contains
|
||||
logging options that are no longer supported with Java 9, startup will fail.
|
||||
|
||||
New Features
|
||||
----------------------
|
||||
* SOLR-9857, SOLR-9858: Collect aggregated metrics from nodes and shard leaders in overseer. (ab)
|
||||
|
@ -473,6 +477,8 @@ Bug Fixes
|
|||
|
||||
* SOLR-11268: AtomicUpdateProcessor complains missing UpdateLog (noble, Ishan Chattopadhyaya)
|
||||
|
||||
* SOLR-8689: Fix bin/solr.cmd so it can run properly on Java 9 (Uwe Schindler, hossman)
|
||||
|
||||
Optimizations
|
||||
----------------------
|
||||
|
||||
|
|
|
@ -1113,14 +1113,28 @@ IF "%GC_TUNE%"=="" (
|
|||
-XX:-OmitStackTraceInFastThrow
|
||||
)
|
||||
|
||||
IF "%GC_LOG_OPTS%"=="" (
|
||||
set GC_LOG_OPTS=-verbose:gc ^
|
||||
-XX:+PrintHeapAtGC ^
|
||||
-XX:+PrintGCDetails ^
|
||||
-XX:+PrintGCDateStamps ^
|
||||
-XX:+PrintGCTimeStamps ^
|
||||
-XX:+PrintTenuringDistribution ^
|
||||
-XX:+PrintGCApplicationStoppedTime
|
||||
if !JAVA_MAJOR_VERSION! GEQ 9 (
|
||||
IF NOT "%GC_LOG_OPTS%"=="" (
|
||||
echo ERROR: On Java 9 you cannot set GC_LOG_OPTS, only default GC logging is available. Exiting
|
||||
GOTO :eof
|
||||
)
|
||||
set GC_LOG_OPTS="-Xlog:gc*:file=\"!SOLR_LOGS_DIR!\solr_gc.log\":time,uptime:filecount=9,filesize=20000"
|
||||
) else (
|
||||
IF "%GC_LOG_OPTS%"=="" (
|
||||
rem Set defaults for Java 8
|
||||
set GC_LOG_OPTS=-verbose:gc ^
|
||||
-XX:+PrintHeapAtGC ^
|
||||
-XX:+PrintGCDetails ^
|
||||
-XX:+PrintGCDateStamps ^
|
||||
-XX:+PrintGCTimeStamps ^
|
||||
-XX:+PrintTenuringDistribution ^
|
||||
-XX:+PrintGCApplicationStoppedTime
|
||||
)
|
||||
if "%JAVA_VENDOR%" == "IBM J9" (
|
||||
set GC_LOG_OPTS=!GC_LOG_OPTS! "-Xverbosegclog:!SOLR_LOGS_DIR!\solr_gc.log" -XX:+UseGCLogFileRotation -XX:NumberOfGCLogFiles=9 -XX:GCLogFileSize=20M
|
||||
) else (
|
||||
set GC_LOG_OPTS=!GC_LOG_OPTS! "-Xloggc:!SOLR_LOGS_DIR!\solr_gc.log" -XX:+UseGCLogFileRotation -XX:NumberOfGCLogFiles=9 -XX:GCLogFileSize=20M
|
||||
)
|
||||
)
|
||||
|
||||
IF "%verbose%"=="1" (
|
||||
|
@ -1207,26 +1221,20 @@ IF NOT EXIST "%SOLR_SERVER_DIR%\tmp" (
|
|||
mkdir "%SOLR_SERVER_DIR%\tmp"
|
||||
)
|
||||
|
||||
IF "%JAVA_VENDOR%" == "IBM J9" (
|
||||
set GCLOG_OPT="-Xverbosegclog:!SOLR_LOGS_DIR!\solr_gc.log" -XX:+UseGCLogFileRotation -XX:NumberOfGCLogFiles=9 -XX:GCLogFileSize=20M
|
||||
) else (
|
||||
set GCLOG_OPT="-Xloggc:!SOLR_LOGS_DIR!\solr_gc.log" -XX:+UseGCLogFileRotation -XX:NumberOfGCLogFiles=9 -XX:GCLogFileSize=20M
|
||||
)
|
||||
|
||||
IF "%DEFAULT_CONFDIR%"=="" set "DEFAULT_CONFDIR=%SOLR_SERVER_DIR%\solr\configsets\_default\conf"
|
||||
|
||||
IF "%FG%"=="1" (
|
||||
REM run solr in the foreground
|
||||
title "Solr-%SOLR_PORT%"
|
||||
echo %SOLR_PORT%>"%SOLR_TIP%"\bin\solr-%SOLR_PORT%.port
|
||||
"%JAVA%" %SERVEROPT% %SOLR_JAVA_MEM% %START_OPTS% %GCLOG_OPT% ^
|
||||
"%JAVA%" %SERVEROPT% %SOLR_JAVA_MEM% %START_OPTS% ^
|
||||
-Dlog4j.configuration="%LOG4J_CONFIG%" -DSTOP.PORT=!STOP_PORT! -DSTOP.KEY=%STOP_KEY% ^
|
||||
-Dsolr.solr.home="%SOLR_HOME%" -Dsolr.install.dir="%SOLR_TIP%" -Dsolr.default.confdir="%DEFAULT_CONFDIR%" ^
|
||||
-Djetty.host=%SOLR_JETTY_HOST% -Djetty.port=%SOLR_PORT% -Djetty.home="%SOLR_SERVER_DIR%" ^
|
||||
-Djava.io.tmpdir="%SOLR_SERVER_DIR%\tmp" -jar start.jar "%SOLR_JETTY_CONFIG%" "%SOLR_JETTY_ADDL_CONFIG%"
|
||||
) ELSE (
|
||||
START /B "Solr-%SOLR_PORT%" /D "%SOLR_SERVER_DIR%" ^
|
||||
"%JAVA%" %SERVEROPT% %SOLR_JAVA_MEM% %START_OPTS% %GCLOG_OPT% ^
|
||||
"%JAVA%" %SERVEROPT% %SOLR_JAVA_MEM% %START_OPTS% ^
|
||||
-Dlog4j.configuration="%LOG4J_CONFIG%" -DSTOP.PORT=!STOP_PORT! -DSTOP.KEY=%STOP_KEY% ^
|
||||
-Dsolr.log.muteconsole ^
|
||||
-Dsolr.solr.home="%SOLR_HOME%" -Dsolr.install.dir="%SOLR_TIP%" -Dsolr.default.confdir="%DEFAULT_CONFDIR%" ^
|
||||
|
@ -1843,8 +1851,12 @@ FOR /f "usebackq tokens=3" %%a IN (`^""%JAVA%" -version 2^>^&1 ^| findstr "versi
|
|||
set JAVA_VERSION_INFO=!JAVA_VERSION_INFO:"=!
|
||||
|
||||
REM Extract the major Java version, e.g. 7, 8, 9, 10 ...
|
||||
for /f "tokens=2 delims=." %%a in ("!JAVA_VERSION_INFO!") do (
|
||||
set JAVA_MAJOR_VERSION=%%a
|
||||
for /f "tokens=1,2 delims=." %%a in ("!JAVA_VERSION_INFO!") do (
|
||||
if "%%a" GEQ "9" (
|
||||
set JAVA_MAJOR_VERSION=%%a
|
||||
) else (
|
||||
set JAVA_MAJOR_VERSION=%%b
|
||||
)
|
||||
)
|
||||
|
||||
REM Don't look for "_{build}" if we're on IBM J9.
|
||||
|
|
|
@ -27,7 +27,9 @@ REM set SOLR_JAVA_HOME=
|
|||
REM Increase Java Min/Max Heap as needed to support your indexing / query needs
|
||||
REM set SOLR_JAVA_MEM=-Xms512m -Xmx512m
|
||||
|
||||
REM Enable verbose GC logging
|
||||
REM Configure verbose GC logging:
|
||||
REM For Java 8: if this is set, additional params will be added to specify the log file & rotation
|
||||
REM For Java 9 or higher: GC_LOG_OPTS is currently not supported. If you set it, the startup script will exit with failure.
|
||||
REM set GC_LOG_OPTS=-verbose:gc -XX:+PrintHeapAtGC -XX:+PrintGCDetails -XX:+PrintGCDateStamps -XX:+PrintGCTimeStamps -XX:+PrintTenuringDistribution -XX:+PrintGCApplicationStoppedTime
|
||||
|
||||
REM Various GC settings have shown to work well for a number of common Solr workloads.
|
||||
|
|
|
@ -36,12 +36,12 @@
|
|||
|
||||
# Enable verbose GC logging...
|
||||
# * If this is unset, various default options will be selected depending on which JVM version is in use
|
||||
# * For java8 or lower: if this is set, additional params will be added to specify the log file & rotation
|
||||
# * For java9 or higher: each included opt param that starts with '-Xlog:gc', but does not include an output
|
||||
# specifier, will have a 'file' output specifier (as well as formatting & rollover options) appended,
|
||||
# using the effective value of the SOLR_LOGS_DIR.
|
||||
# * For Java 8: if this is set, additional params will be added to specify the log file & rotation
|
||||
# * For Java 9 or higher: each included opt param that starts with '-Xlog:gc', but does not include an
|
||||
# output specifier, will have a 'file' output specifier (as well as formatting & rollover options)
|
||||
# appended, using the effective value of the SOLR_LOGS_DIR.
|
||||
#
|
||||
#GC_LOG_OPTS='-Xlog:gc*' # (java9)
|
||||
#GC_LOG_OPTS='-Xlog:gc*' # (Java 9+)
|
||||
#GC_LOG_OPTS="-verbose:gc -XX:+PrintHeapAtGC -XX:+PrintGCDetails \
|
||||
# -XX:+PrintGCDateStamps -XX:+PrintGCTimeStamps -XX:+PrintTenuringDistribution -XX:+PrintGCApplicationStoppedTime"
|
||||
|
||||
|
|
Loading…
Reference in New Issue