mirror of https://github.com/apache/lucene.git
LUCENE-8312: Leverage impacts to speed up SynonymQuery.
This commit is contained in:
parent
24d79de796
commit
0a1de2c4a5
|
@ -30,8 +30,8 @@ import org.apache.lucene.index.Terms;
|
|||
import org.apache.lucene.index.TermsEnum;
|
||||
import org.apache.lucene.search.DocIdSetIterator;
|
||||
import org.apache.lucene.search.Explanation;
|
||||
import org.apache.lucene.search.ImpactsDISI;
|
||||
import org.apache.lucene.search.IndexSearcher;
|
||||
import org.apache.lucene.search.MaxScoreCache;
|
||||
import org.apache.lucene.search.Query;
|
||||
import org.apache.lucene.search.ScoreMode;
|
||||
import org.apache.lucene.search.Scorer;
|
||||
|
@ -133,9 +133,9 @@ final class FeatureQuery extends Query {
|
|||
return null;
|
||||
}
|
||||
|
||||
SimScorer scorer = function.scorer(boost);
|
||||
ImpactsEnum impacts = termsEnum.impacts(PostingsEnum.FREQS);
|
||||
MaxScoreCache maxScoreCache = new MaxScoreCache(impacts, scorer);
|
||||
final SimScorer scorer = function.scorer(boost);
|
||||
final ImpactsEnum impacts = termsEnum.impacts(PostingsEnum.FREQS);
|
||||
final ImpactsDISI impactsDisi = new ImpactsDISI(impacts, impacts, scorer);
|
||||
|
||||
return new Scorer(this) {
|
||||
|
||||
|
@ -151,19 +151,23 @@ final class FeatureQuery extends Query {
|
|||
|
||||
@Override
|
||||
public DocIdSetIterator iterator() {
|
||||
return impacts;
|
||||
return impactsDisi;
|
||||
}
|
||||
|
||||
@Override
|
||||
public int advanceShallow(int target) throws IOException {
|
||||
return maxScoreCache.advanceShallow(target);
|
||||
return impactsDisi.advanceShallow(target);
|
||||
}
|
||||
|
||||
@Override
|
||||
public float getMaxScore(int upTo) throws IOException {
|
||||
return maxScoreCache.getMaxScore(upTo);
|
||||
return impactsDisi.getMaxScore(upTo);
|
||||
}
|
||||
|
||||
@Override
|
||||
public void setMinCompetitiveScore(float minScore) {
|
||||
impactsDisi.setMinCompetitiveScore(minScore);
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
|
|
|
@ -16,41 +16,14 @@
|
|||
*/
|
||||
package org.apache.lucene.index;
|
||||
|
||||
import java.io.IOException;
|
||||
|
||||
/**
|
||||
* Extension of {@link PostingsEnum} which also provides information about
|
||||
* upcoming impacts.
|
||||
* @lucene.experimental
|
||||
*/
|
||||
public abstract class ImpactsEnum extends PostingsEnum {
|
||||
public abstract class ImpactsEnum extends PostingsEnum implements ImpactsSource {
|
||||
|
||||
/** Sole constructor. */
|
||||
protected ImpactsEnum() {}
|
||||
|
||||
/**
|
||||
* Shallow-advance to {@code target}. This is cheaper than calling
|
||||
* {@link #advance(int)} and allows further calls to {@link #getImpacts()}
|
||||
* to ignore doc IDs that are less than {@code target} in order to get more
|
||||
* precise information about impacts.
|
||||
* This method may not be called on targets that are less than the current
|
||||
* {@link #docID()}.
|
||||
* After this method has been called, {@link #nextDoc()} may not be called
|
||||
* if the current doc ID is less than {@code target - 1} and
|
||||
* {@link #advance(int)} may not be called on targets that are less than
|
||||
* {@code target}.
|
||||
*/
|
||||
public abstract void advanceShallow(int target) throws IOException;
|
||||
|
||||
/**
|
||||
* Get information about upcoming impacts for doc ids that are greater than
|
||||
* or equal to the maximum of {@link #docID()} and the last target that was
|
||||
* passed to {@link #advanceShallow(int)}.
|
||||
* This method may not be called on an unpositioned iterator on which
|
||||
* {@link #advanceShallow(int)} has never been called.
|
||||
* NOTE: advancing this iterator may invalidate the returned impacts, so they
|
||||
* should not be used after the iterator has been advanced.
|
||||
*/
|
||||
public abstract Impacts getImpacts() throws IOException;
|
||||
|
||||
}
|
||||
|
|
|
@ -0,0 +1,54 @@
|
|||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
package org.apache.lucene.index;
|
||||
|
||||
import java.io.IOException;
|
||||
|
||||
import org.apache.lucene.search.DocIdSetIterator;
|
||||
|
||||
/**
|
||||
* Source of {@link Impacts}.
|
||||
* @lucene.internal
|
||||
*/
|
||||
public interface ImpactsSource {
|
||||
|
||||
/**
|
||||
* Shallow-advance to {@code target}. This is cheaper than calling
|
||||
* {@link DocIdSetIterator#advance(int)} and allows further calls to
|
||||
* {@link #getImpacts()} to ignore doc IDs that are less than {@code target}
|
||||
* in order to get more precise information about impacts.
|
||||
* This method may not be called on targets that are less than the current
|
||||
* {@link DocIdSetIterator#docID()}.
|
||||
* After this method has been called, {@link DocIdSetIterator#nextDoc()} may
|
||||
* not be called if the current doc ID is less than {@code target - 1} and
|
||||
* {@link DocIdSetIterator#advance(int)} may not be called on targets that
|
||||
* are less than {@code target}.
|
||||
*/
|
||||
void advanceShallow(int target) throws IOException;
|
||||
|
||||
/**
|
||||
* Get information about upcoming impacts for doc ids that are greater than
|
||||
* or equal to the maximum of {@link DocIdSetIterator#docID()} and the last
|
||||
* target that was passed to {@link #advanceShallow(int)}.
|
||||
* This method may not be called on an unpositioned iterator on which
|
||||
* {@link #advanceShallow(int)} has never been called.
|
||||
* NOTE: advancing this iterator may invalidate the returned impacts, so they
|
||||
* should not be used after the iterator has been advanced.
|
||||
*/
|
||||
Impacts getImpacts() throws IOException;
|
||||
|
||||
}
|
|
@ -0,0 +1,149 @@
|
|||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
package org.apache.lucene.search;
|
||||
|
||||
import java.io.IOException;
|
||||
|
||||
import org.apache.lucene.index.Impacts;
|
||||
import org.apache.lucene.index.ImpactsEnum;
|
||||
import org.apache.lucene.index.ImpactsSource;
|
||||
import org.apache.lucene.search.similarities.Similarity.SimScorer;
|
||||
|
||||
/**
|
||||
* {@link DocIdSetIterator} that skips non-competitive docs thanks to the
|
||||
* indexed impacts. Call {@link #setMinCompetitiveScore(float)} in order to
|
||||
* give this iterator the ability to skip low-scoring documents.
|
||||
* @lucene.internal
|
||||
*/
|
||||
public final class ImpactsDISI extends DocIdSetIterator {
|
||||
|
||||
private final DocIdSetIterator in;
|
||||
private final ImpactsSource impactsSource;
|
||||
private final MaxScoreCache maxScoreCache;
|
||||
private final float globalMaxScore;
|
||||
private float minCompetitiveScore = 0;
|
||||
private int upTo = DocIdSetIterator.NO_MORE_DOCS;
|
||||
private float maxScore = Float.MAX_VALUE;
|
||||
|
||||
/**
|
||||
* Sole constructor.
|
||||
* @param in wrapped iterator
|
||||
* @param impactsSource source of impacts
|
||||
* @param scorer scorer
|
||||
*/
|
||||
public ImpactsDISI(DocIdSetIterator in, ImpactsSource impactsSource, SimScorer scorer) {
|
||||
this.in = in;
|
||||
this.impactsSource = impactsSource;
|
||||
this.maxScoreCache = new MaxScoreCache(impactsSource, scorer);
|
||||
this.globalMaxScore = scorer.score(Float.MAX_VALUE, 1L);
|
||||
}
|
||||
|
||||
/**
|
||||
* Set the minimum competitive score.
|
||||
* @see Scorer#setMinCompetitiveScore(float)
|
||||
*/
|
||||
public void setMinCompetitiveScore(float minCompetitiveScore) {
|
||||
assert minCompetitiveScore >= this.minCompetitiveScore;
|
||||
if (minCompetitiveScore > this.minCompetitiveScore) {
|
||||
this.minCompetitiveScore = minCompetitiveScore;
|
||||
// force upTo and maxScore to be recomputed so that we will skip documents
|
||||
// if the current block of documents is not competitive - only if the min
|
||||
// competitive score actually increased
|
||||
upTo = -1;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Implement the contract of {@link Scorer#advanceShallow(int)} based on the
|
||||
* wrapped {@link ImpactsEnum}.
|
||||
* @see Scorer#advanceShallow(int)
|
||||
*/
|
||||
public int advanceShallow(int target) throws IOException {
|
||||
impactsSource.advanceShallow(target);
|
||||
Impacts impacts = impactsSource.getImpacts();
|
||||
return impacts.getDocIdUpTo(0);
|
||||
}
|
||||
|
||||
/**
|
||||
* Implement the contract of {@link Scorer#getMaxScore(int)} based on the
|
||||
* wrapped {@link ImpactsEnum} and {@link Scorer}.
|
||||
* @see Scorer#getMaxScore(int)
|
||||
*/
|
||||
public float getMaxScore(int upTo) throws IOException {
|
||||
final int level = maxScoreCache.getLevel(upTo);
|
||||
if (level == -1) {
|
||||
return globalMaxScore;
|
||||
} else {
|
||||
return maxScoreCache.getMaxScoreForLevel(level);
|
||||
}
|
||||
}
|
||||
|
||||
private int advanceTarget(int target) throws IOException {
|
||||
if (target <= upTo) {
|
||||
// we are still in the current block, which is considered competitive
|
||||
// according to impacts, no skipping
|
||||
return target;
|
||||
}
|
||||
|
||||
upTo = advanceShallow(target);
|
||||
maxScore = maxScoreCache.getMaxScoreForLevel(0);
|
||||
|
||||
while (true) {
|
||||
assert upTo >= target;
|
||||
|
||||
if (maxScore >= minCompetitiveScore) {
|
||||
return target;
|
||||
}
|
||||
|
||||
if (upTo == NO_MORE_DOCS) {
|
||||
return NO_MORE_DOCS;
|
||||
}
|
||||
|
||||
final int skipUpTo = maxScoreCache.getSkipUpTo(minCompetitiveScore);
|
||||
if (skipUpTo == -1) { // no further skipping
|
||||
target = upTo + 1;
|
||||
} else if (skipUpTo == NO_MORE_DOCS) {
|
||||
return NO_MORE_DOCS;
|
||||
} else {
|
||||
target = skipUpTo + 1;
|
||||
}
|
||||
upTo = advanceShallow(target);
|
||||
maxScore = maxScoreCache.getMaxScoreForLevel(0);
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public int advance(int target) throws IOException {
|
||||
return in.advance(advanceTarget(target));
|
||||
}
|
||||
|
||||
@Override
|
||||
public int nextDoc() throws IOException {
|
||||
return advance(in.docID() + 1);
|
||||
}
|
||||
|
||||
@Override
|
||||
public int docID() {
|
||||
return in.docID();
|
||||
}
|
||||
|
||||
@Override
|
||||
public long cost() {
|
||||
return in.cost();
|
||||
}
|
||||
|
||||
}
|
|
@ -22,7 +22,7 @@ import java.util.List;
|
|||
|
||||
import org.apache.lucene.index.Impact;
|
||||
import org.apache.lucene.index.Impacts;
|
||||
import org.apache.lucene.index.ImpactsEnum;
|
||||
import org.apache.lucene.index.ImpactsSource;
|
||||
import org.apache.lucene.search.similarities.Similarity.SimScorer;
|
||||
import org.apache.lucene.util.ArrayUtil;
|
||||
|
||||
|
@ -30,22 +30,21 @@ import org.apache.lucene.util.ArrayUtil;
|
|||
* Compute maximum scores based on {@link Impacts} and keep them in a cache in
|
||||
* order not to run expensive similarity score computations multiple times on
|
||||
* the same data.
|
||||
* @lucene.internal
|
||||
*/
|
||||
public final class MaxScoreCache {
|
||||
final class MaxScoreCache {
|
||||
|
||||
private final ImpactsEnum impactsEnum;
|
||||
private final ImpactsSource impactsSource;
|
||||
private final SimScorer scorer;
|
||||
private final float globalMaxScore;
|
||||
private float[] maxScoreCache;
|
||||
private int[] maxScoreCacheUpTo;
|
||||
|
||||
/**
|
||||
* Sole constructor.
|
||||
*/
|
||||
public MaxScoreCache(ImpactsEnum impactsEnum, SimScorer scorer) {
|
||||
this.impactsEnum = impactsEnum;
|
||||
public MaxScoreCache(ImpactsSource impactsSource, SimScorer scorer) {
|
||||
this.impactsSource = impactsSource;
|
||||
this.scorer = scorer;
|
||||
globalMaxScore = scorer.score(Integer.MAX_VALUE, 1L);
|
||||
maxScoreCache = new float[0];
|
||||
maxScoreCacheUpTo = new int[0];
|
||||
}
|
||||
|
@ -71,8 +70,8 @@ public final class MaxScoreCache {
|
|||
* Return the first level that includes all doc IDs up to {@code upTo},
|
||||
* or -1 if there is no such level.
|
||||
*/
|
||||
private int getLevel(int upTo) throws IOException {
|
||||
final Impacts impacts = impactsEnum.getImpacts();
|
||||
int getLevel(int upTo) throws IOException {
|
||||
final Impacts impacts = impactsSource.getImpacts();
|
||||
for (int level = 0, numLevels = impacts.numLevels(); level < numLevels; ++level) {
|
||||
final int impactsUpTo = impacts.getDocIdUpTo(level);
|
||||
if (upTo <= impactsUpTo) {
|
||||
|
@ -86,7 +85,7 @@ public final class MaxScoreCache {
|
|||
* Return the maximum score for the given {@code level}.
|
||||
*/
|
||||
float getMaxScoreForLevel(int level) throws IOException {
|
||||
final Impacts impacts = impactsEnum.getImpacts();
|
||||
final Impacts impacts = impactsSource.getImpacts();
|
||||
ensureCacheSize(level + 1);
|
||||
final int levelUpTo = impacts.getDocIdUpTo(level);
|
||||
if (maxScoreCacheUpTo[level] < levelUpTo) {
|
||||
|
@ -100,8 +99,7 @@ public final class MaxScoreCache {
|
|||
* Return the maximum level at which scores are all less than {@code minScore},
|
||||
* or -1 if none.
|
||||
*/
|
||||
int getSkipLevel(float minScore) throws IOException {
|
||||
final Impacts impacts = impactsEnum.getImpacts();
|
||||
private int getSkipLevel(Impacts impacts, float minScore) throws IOException {
|
||||
final int numLevels = impacts.numLevels();
|
||||
for (int level = 0; level < numLevels; ++level) {
|
||||
if (getMaxScoreForLevel(level) >= minScore) {
|
||||
|
@ -112,27 +110,17 @@ public final class MaxScoreCache {
|
|||
}
|
||||
|
||||
/**
|
||||
* Implement the contract of {@link Scorer#advanceShallow(int)} based on the
|
||||
* wrapped {@link ImpactsEnum}.
|
||||
* @see Scorer#advanceShallow(int)
|
||||
* Return the an inclusive upper bound of documents that all have a score that
|
||||
* is less than {@code minScore}, or {@code -1} if the current document may
|
||||
* be competitive.
|
||||
*/
|
||||
public int advanceShallow(int target) throws IOException {
|
||||
impactsEnum.advanceShallow(target);
|
||||
Impacts impacts = impactsEnum.getImpacts();
|
||||
return impacts.getDocIdUpTo(0);
|
||||
int getSkipUpTo(float minScore) throws IOException {
|
||||
final Impacts impacts = impactsSource.getImpacts();
|
||||
final int level = getSkipLevel(impacts, minScore);
|
||||
if (level == -1) {
|
||||
return -1;
|
||||
}
|
||||
return impacts.getDocIdUpTo(level);
|
||||
}
|
||||
|
||||
/**
|
||||
* Implement the contract of {@link Scorer#getMaxScore(int)} based on the
|
||||
* wrapped {@link ImpactsEnum} and {@link Scorer}.
|
||||
* @see Scorer#getMaxScore(int)
|
||||
*/
|
||||
public float getMaxScore(int upTo) throws IOException {
|
||||
final int level = getLevel(upTo);
|
||||
if (level == -1) {
|
||||
return globalMaxScore;
|
||||
} else {
|
||||
return getMaxScoreForLevel(level);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
@ -21,12 +21,19 @@ import java.io.IOException;
|
|||
import java.util.ArrayList;
|
||||
import java.util.Arrays;
|
||||
import java.util.Collections;
|
||||
import java.util.Iterator;
|
||||
import java.util.List;
|
||||
import java.util.Objects;
|
||||
import java.util.Set;
|
||||
|
||||
import org.apache.lucene.index.Impact;
|
||||
import org.apache.lucene.index.Impacts;
|
||||
import org.apache.lucene.index.ImpactsEnum;
|
||||
import org.apache.lucene.index.ImpactsSource;
|
||||
import org.apache.lucene.index.IndexReader;
|
||||
import org.apache.lucene.index.LeafReaderContext;
|
||||
import org.apache.lucene.index.PostingsEnum;
|
||||
import org.apache.lucene.index.SlowImpactsEnum;
|
||||
import org.apache.lucene.index.Term;
|
||||
import org.apache.lucene.index.TermState;
|
||||
import org.apache.lucene.index.TermStates;
|
||||
|
@ -34,6 +41,7 @@ import org.apache.lucene.index.Terms;
|
|||
import org.apache.lucene.index.TermsEnum;
|
||||
import org.apache.lucene.search.similarities.Similarity;
|
||||
import org.apache.lucene.util.BytesRef;
|
||||
import org.apache.lucene.util.PriorityQueue;
|
||||
|
||||
/**
|
||||
* A query that treats multiple terms as synonyms.
|
||||
|
@ -112,7 +120,7 @@ public final class SynonymQuery extends Query {
|
|||
@Override
|
||||
public Weight createWeight(IndexSearcher searcher, ScoreMode scoreMode, float boost) throws IOException {
|
||||
if (scoreMode.needsScores()) {
|
||||
return new SynonymWeight(this, searcher, boost);
|
||||
return new SynonymWeight(this, searcher, scoreMode, boost);
|
||||
} else {
|
||||
// if scores are not needed, let BooleanWeight deal with optimizing that case.
|
||||
BooleanQuery.Builder bq = new BooleanQuery.Builder();
|
||||
|
@ -127,9 +135,12 @@ public final class SynonymQuery extends Query {
|
|||
private final TermStates termStates[];
|
||||
private final Similarity similarity;
|
||||
private final Similarity.SimScorer simWeight;
|
||||
private final ScoreMode scoreMode;
|
||||
|
||||
SynonymWeight(Query query, IndexSearcher searcher, float boost) throws IOException {
|
||||
SynonymWeight(Query query, IndexSearcher searcher, ScoreMode scoreMode, float boost) throws IOException {
|
||||
super(query);
|
||||
assert scoreMode.needsScores();
|
||||
this.scoreMode = scoreMode;
|
||||
CollectionStatistics collectionStats = searcher.collectionStatistics(terms[0].field());
|
||||
long docFreq = 0;
|
||||
long totalTermFreq = 0;
|
||||
|
@ -176,8 +187,7 @@ public final class SynonymQuery extends Query {
|
|||
if (newDoc == doc) {
|
||||
final float freq;
|
||||
if (scorer instanceof SynonymScorer) {
|
||||
SynonymScorer synScorer = (SynonymScorer) scorer;
|
||||
freq = synScorer.tf(synScorer.getSubMatches());
|
||||
freq = ((SynonymScorer) scorer).freq();
|
||||
} else {
|
||||
assert scorer instanceof TermScorer;
|
||||
freq = ((TermScorer)scorer).freq();
|
||||
|
@ -197,61 +207,277 @@ public final class SynonymQuery extends Query {
|
|||
|
||||
@Override
|
||||
public Scorer scorer(LeafReaderContext context) throws IOException {
|
||||
// we use termscorers + disjunction as an impl detail
|
||||
List<Scorer> subScorers = new ArrayList<>();
|
||||
List<PostingsEnum> iterators = new ArrayList<>();
|
||||
List<ImpactsEnum> impacts = new ArrayList<>();
|
||||
for (int i = 0; i < terms.length; i++) {
|
||||
TermState state = termStates[i].get(context);
|
||||
if (state != null) {
|
||||
TermsEnum termsEnum = context.reader().terms(terms[i].field()).iterator();
|
||||
termsEnum.seekExact(terms[i].bytes(), state);
|
||||
LeafSimScorer simScorer = new LeafSimScorer(simWeight, context.reader(), terms[0].field(), true);
|
||||
subScorers.add(new TermScorer(this, termsEnum, ScoreMode.COMPLETE, simScorer));
|
||||
if (scoreMode == ScoreMode.TOP_SCORES) {
|
||||
ImpactsEnum impactsEnum = termsEnum.impacts(PostingsEnum.FREQS);
|
||||
iterators.add(impactsEnum);
|
||||
impacts.add(impactsEnum);
|
||||
} else {
|
||||
PostingsEnum postingsEnum = termsEnum.postings(null, PostingsEnum.FREQS);
|
||||
iterators.add(postingsEnum);
|
||||
impacts.add(new SlowImpactsEnum(postingsEnum));
|
||||
}
|
||||
}
|
||||
}
|
||||
if (subScorers.isEmpty()) {
|
||||
|
||||
if (iterators.isEmpty()) {
|
||||
return null;
|
||||
} else if (subScorers.size() == 1) {
|
||||
// we must optimize this case (term not in segment), disjunctionscorer requires >= 2 subs
|
||||
return subScorers.get(0);
|
||||
} else {
|
||||
LeafSimScorer simScorer = new LeafSimScorer(simWeight, context.reader(), terms[0].field(), true);
|
||||
return new SynonymScorer(simScorer, this, subScorers);
|
||||
}
|
||||
|
||||
LeafSimScorer simScorer = new LeafSimScorer(simWeight, context.reader(), terms[0].field(), true);
|
||||
|
||||
// we must optimize this case (term not in segment), disjunctions require >= 2 subs
|
||||
if (iterators.size() == 1) {
|
||||
if (scoreMode == ScoreMode.TOP_SCORES) {
|
||||
return new TermScorer(this, impacts.get(0), simScorer);
|
||||
} else {
|
||||
return new TermScorer(this, iterators.get(0), simScorer);
|
||||
}
|
||||
}
|
||||
|
||||
// we use termscorers + disjunction as an impl detail
|
||||
DisiPriorityQueue queue = new DisiPriorityQueue(iterators.size());
|
||||
for (PostingsEnum postings : iterators) {
|
||||
queue.add(new DisiWrapper(new TermScorer(this, postings, simScorer)));
|
||||
}
|
||||
// Even though it is called approximation, it is accurate since none of
|
||||
// the sub iterators are two-phase iterators.
|
||||
DocIdSetIterator iterator = new DisjunctionDISIApproximation(queue);
|
||||
|
||||
ImpactsSource impactsSource = mergeImpacts(impacts.toArray(new ImpactsEnum[0]));
|
||||
ImpactsDISI impactsDisi = new ImpactsDISI(iterator, impactsSource, simScorer.getSimScorer());
|
||||
|
||||
if (scoreMode == ScoreMode.TOP_SCORES) {
|
||||
iterator = impactsDisi;
|
||||
}
|
||||
|
||||
return new SynonymScorer(this, queue, iterator, impactsDisi, simScorer);
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean isCacheable(LeafReaderContext ctx) {
|
||||
return true;
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
static class SynonymScorer extends DisjunctionScorer {
|
||||
private final LeafSimScorer similarity;
|
||||
|
||||
SynonymScorer(LeafSimScorer similarity, Weight weight, List<Scorer> subScorers) {
|
||||
super(weight, subScorers, true);
|
||||
this.similarity = similarity;
|
||||
/**
|
||||
* Merge impacts for multiple synonyms.
|
||||
*/
|
||||
static ImpactsSource mergeImpacts(ImpactsEnum[] impactsEnums) {
|
||||
return new ImpactsSource() {
|
||||
|
||||
class SubIterator {
|
||||
final Iterator<Impact> iterator;
|
||||
int previousFreq;
|
||||
Impact current;
|
||||
|
||||
SubIterator(Iterator<Impact> iterator) {
|
||||
this.iterator = iterator;
|
||||
this.current = iterator.next();
|
||||
}
|
||||
|
||||
void next() {
|
||||
previousFreq = current.freq;
|
||||
if (iterator.hasNext() == false) {
|
||||
current = null;
|
||||
} else {
|
||||
current = iterator.next();
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
@Override
|
||||
public Impacts getImpacts() throws IOException {
|
||||
final Impacts[] impacts = new Impacts[impactsEnums.length];
|
||||
// Use the impacts that have the lower next boundary as a lead.
|
||||
// It will decide on the number of levels and the block boundaries.
|
||||
Impacts tmpLead = null;
|
||||
for (int i = 0; i < impactsEnums.length; ++i) {
|
||||
impacts[i] = impactsEnums[i].getImpacts();
|
||||
if (tmpLead == null || impacts[i].getDocIdUpTo(0) < tmpLead.getDocIdUpTo(0)) {
|
||||
tmpLead = impacts[i];
|
||||
}
|
||||
}
|
||||
final Impacts lead = tmpLead;
|
||||
return new Impacts() {
|
||||
|
||||
@Override
|
||||
public int numLevels() {
|
||||
// Delegate to the lead
|
||||
return lead.numLevels();
|
||||
}
|
||||
|
||||
@Override
|
||||
public int getDocIdUpTo(int level) {
|
||||
// Delegate to the lead
|
||||
return lead.getDocIdUpTo(level);
|
||||
}
|
||||
|
||||
/**
|
||||
* Return the minimum level whose impacts are valid up to {@code docIdUpTo},
|
||||
* or {@code -1} if there is no such level.
|
||||
*/
|
||||
private int getLevel(Impacts impacts, int docIdUpTo) {
|
||||
for (int level = 0, numLevels = impacts.numLevels(); level < numLevels; ++level) {
|
||||
if (impacts.getDocIdUpTo(level) >= docIdUpTo) {
|
||||
return level;
|
||||
}
|
||||
}
|
||||
return -1;
|
||||
}
|
||||
|
||||
@Override
|
||||
public List<Impact> getImpacts(int level) {
|
||||
final int docIdUpTo = getDocIdUpTo(level);
|
||||
|
||||
List<List<Impact>> toMerge = new ArrayList<>();
|
||||
|
||||
for (int i = 0; i < impactsEnums.length; ++i) {
|
||||
if (impactsEnums[i].docID() <= docIdUpTo) {
|
||||
int impactsLevel = getLevel(impacts[i], docIdUpTo);
|
||||
if (impactsLevel == -1) {
|
||||
// One instance doesn't have impacts that cover up to docIdUpTo
|
||||
// Return impacts that trigger the maximum score
|
||||
return Collections.singletonList(new Impact(Integer.MAX_VALUE, 1L));
|
||||
}
|
||||
toMerge.add(impacts[i].getImpacts(impactsLevel));
|
||||
}
|
||||
}
|
||||
assert toMerge.size() > 0; // otherwise it would mean the docID is > docIdUpTo, which is wrong
|
||||
|
||||
if (toMerge.size() == 1) {
|
||||
// common if one synonym is common and the other one is rare
|
||||
return toMerge.get(0);
|
||||
}
|
||||
|
||||
PriorityQueue<SubIterator> pq = new PriorityQueue<SubIterator>(impacts.length) {
|
||||
@Override
|
||||
protected boolean lessThan(SubIterator a, SubIterator b) {
|
||||
if (a.current == null) { // means iteration is finished
|
||||
return false;
|
||||
}
|
||||
if (b.current == null) {
|
||||
return true;
|
||||
}
|
||||
return Long.compareUnsigned(a.current.norm, b.current.norm) < 0;
|
||||
}
|
||||
};
|
||||
for (List<Impact> impacts : toMerge) {
|
||||
pq.add(new SubIterator(impacts.iterator()));
|
||||
}
|
||||
|
||||
List<Impact> mergedImpacts = new ArrayList<>();
|
||||
|
||||
// Idea: merge impacts by norm. The tricky thing is that we need to
|
||||
// consider norm values that are not in the impacts too. For
|
||||
// instance if the list of impacts is [{freq=2,norm=10}, {freq=4,norm=12}],
|
||||
// there might well be a document that has a freq of 2 and a length of 11,
|
||||
// which was just not added to the list of impacts because {freq=2,norm=10}
|
||||
// is more competitive. So the way it works is that we track the sum of
|
||||
// the term freqs that we have seen so far in order to account for these
|
||||
// implicit impacts.
|
||||
|
||||
long sumTf = 0;
|
||||
SubIterator top = pq.top();
|
||||
do {
|
||||
final long norm = top.current.norm;
|
||||
do {
|
||||
sumTf += top.current.freq - top.previousFreq;
|
||||
top.next();
|
||||
top = pq.updateTop();
|
||||
} while (top.current != null && top.current.norm == norm);
|
||||
|
||||
final int freqUpperBound = (int) Math.min(Integer.MAX_VALUE, sumTf);
|
||||
if (mergedImpacts.isEmpty()) {
|
||||
mergedImpacts.add(new Impact(freqUpperBound, norm));
|
||||
} else {
|
||||
Impact prevImpact = mergedImpacts.get(mergedImpacts.size() - 1);
|
||||
assert Long.compareUnsigned(prevImpact.norm, norm) < 0;
|
||||
if (freqUpperBound > prevImpact.freq) {
|
||||
mergedImpacts.add(new Impact(freqUpperBound, norm));
|
||||
} // otherwise the previous impact is already more competitive
|
||||
}
|
||||
} while (top.current != null);
|
||||
|
||||
return mergedImpacts;
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
@Override
|
||||
public void advanceShallow(int target) throws IOException {
|
||||
for (ImpactsEnum impactsEnum : impactsEnums) {
|
||||
if (impactsEnum.docID() < target) {
|
||||
impactsEnum.advanceShallow(target);
|
||||
}
|
||||
}
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
private static class SynonymScorer extends Scorer {
|
||||
|
||||
private final DisiPriorityQueue queue;
|
||||
private final DocIdSetIterator iterator;
|
||||
private final ImpactsDISI impactsDisi;
|
||||
private final LeafSimScorer simScorer;
|
||||
|
||||
SynonymScorer(Weight weight, DisiPriorityQueue queue, DocIdSetIterator iterator,
|
||||
ImpactsDISI impactsDisi, LeafSimScorer simScorer) {
|
||||
super(weight);
|
||||
this.queue = queue;
|
||||
this.iterator = iterator;
|
||||
this.impactsDisi = impactsDisi;
|
||||
this.simScorer = simScorer;
|
||||
}
|
||||
|
||||
@Override
|
||||
protected float score(DisiWrapper topList) throws IOException {
|
||||
return similarity.score(topList.doc, tf(topList));
|
||||
public int docID() {
|
||||
return iterator.docID();
|
||||
}
|
||||
|
||||
int freq() throws IOException {
|
||||
DisiWrapper w = queue.topList();
|
||||
int freq = ((PostingsEnum) w.iterator).freq();
|
||||
for (w = w.next; w != null; w = w.next) {
|
||||
freq += ((PostingsEnum) w.iterator).freq();
|
||||
if (freq < 0) { // overflow
|
||||
return Integer.MAX_VALUE;
|
||||
}
|
||||
}
|
||||
return freq;
|
||||
}
|
||||
|
||||
@Override
|
||||
public float score() throws IOException {
|
||||
return simScorer.score(iterator.docID(), freq());
|
||||
}
|
||||
|
||||
@Override
|
||||
public DocIdSetIterator iterator() {
|
||||
return iterator;
|
||||
}
|
||||
|
||||
@Override
|
||||
public float getMaxScore(int upTo) throws IOException {
|
||||
// TODO: merge impacts to get better score upper bounds
|
||||
return similarity.getSimScorer().score(Float.MAX_VALUE, 1L);
|
||||
return impactsDisi.getMaxScore(upTo);
|
||||
}
|
||||
|
||||
/** combines TF of all subs. */
|
||||
final int tf(DisiWrapper topList) throws IOException {
|
||||
int tf = 0;
|
||||
for (DisiWrapper w = topList; w != null; w = w.next) {
|
||||
tf += ((TermScorer)w.scorer).freq();
|
||||
}
|
||||
return tf;
|
||||
@Override
|
||||
public int advanceShallow(int target) throws IOException {
|
||||
return impactsDisi.advanceShallow(target);
|
||||
}
|
||||
|
||||
@Override
|
||||
public void setMinCompetitiveScore(float minScore) {
|
||||
impactsDisi.setMinCompetitiveScore(minScore);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
@ -111,7 +111,11 @@ public class TermQuery extends Query {
|
|||
return null;
|
||||
}
|
||||
LeafSimScorer scorer = new LeafSimScorer(simScorer, context.reader(), term.field(), scoreMode.needsScores());
|
||||
return new TermScorer(this, termsEnum, scoreMode, scorer);
|
||||
if (scoreMode == ScoreMode.TOP_SCORES) {
|
||||
return new TermScorer(this, termsEnum.impacts(PostingsEnum.FREQS), scorer);
|
||||
} else {
|
||||
return new TermScorer(this, termsEnum.postings(null, PostingsEnum.FREQS), scorer);
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
|
|
|
@ -19,11 +19,9 @@ package org.apache.lucene.search;
|
|||
|
||||
import java.io.IOException;
|
||||
|
||||
import org.apache.lucene.index.Impacts;
|
||||
import org.apache.lucene.index.ImpactsEnum;
|
||||
import org.apache.lucene.index.PostingsEnum;
|
||||
import org.apache.lucene.index.SlowImpactsEnum;
|
||||
import org.apache.lucene.index.TermsEnum;
|
||||
|
||||
/** Expert: A <code>Scorer</code> for documents matching a <code>Term</code>.
|
||||
*/
|
||||
|
@ -32,101 +30,29 @@ final class TermScorer extends Scorer {
|
|||
private final ImpactsEnum impactsEnum;
|
||||
private final DocIdSetIterator iterator;
|
||||
private final LeafSimScorer docScorer;
|
||||
private final MaxScoreCache maxScoreCache;
|
||||
private float minCompetitiveScore;
|
||||
private final ImpactsDISI impactsDisi;
|
||||
|
||||
/**
|
||||
* Construct a <code>TermScorer</code>.
|
||||
*
|
||||
* @param weight
|
||||
* The weight of the <code>Term</code> in the query.
|
||||
* @param te
|
||||
* A {@link TermsEnum} positioned on the expected term.
|
||||
* @param docScorer
|
||||
* A {@link LeafSimScorer} for the appropriate field.
|
||||
* Construct a {@link TermScorer} that will iterate all documents.
|
||||
*/
|
||||
TermScorer(Weight weight, TermsEnum te, ScoreMode scoreMode, LeafSimScorer docScorer) throws IOException {
|
||||
TermScorer(Weight weight, PostingsEnum postingsEnum, LeafSimScorer docScorer) {
|
||||
super(weight);
|
||||
iterator = this.postingsEnum = postingsEnum;
|
||||
impactsEnum = new SlowImpactsEnum(postingsEnum);
|
||||
impactsDisi = new ImpactsDISI(impactsEnum, impactsEnum, docScorer.getSimScorer());
|
||||
this.docScorer = docScorer;
|
||||
if (scoreMode == ScoreMode.TOP_SCORES) {
|
||||
impactsEnum = te.impacts(PostingsEnum.FREQS);
|
||||
maxScoreCache = new MaxScoreCache(impactsEnum, docScorer.getSimScorer());
|
||||
postingsEnum = impactsEnum;
|
||||
iterator = new DocIdSetIterator() {
|
||||
}
|
||||
|
||||
int upTo = -1;
|
||||
float maxScore;
|
||||
|
||||
private int advanceTarget(int target) throws IOException {
|
||||
if (minCompetitiveScore == 0) {
|
||||
// no potential for skipping
|
||||
return target;
|
||||
}
|
||||
|
||||
if (target > upTo) {
|
||||
impactsEnum.advanceShallow(target);
|
||||
Impacts impacts = impactsEnum.getImpacts();
|
||||
upTo = impacts.getDocIdUpTo(0);
|
||||
maxScore = maxScoreCache.getMaxScoreForLevel(0);
|
||||
}
|
||||
|
||||
while (true) {
|
||||
assert upTo >= target;
|
||||
|
||||
if (maxScore >= minCompetitiveScore) {
|
||||
return target;
|
||||
}
|
||||
|
||||
if (upTo == NO_MORE_DOCS) {
|
||||
return NO_MORE_DOCS;
|
||||
}
|
||||
|
||||
impactsEnum.advanceShallow(upTo + 1);
|
||||
Impacts impacts = impactsEnum.getImpacts();
|
||||
final int level = maxScoreCache.getSkipLevel(minCompetitiveScore);
|
||||
if (level >= 0) {
|
||||
// we can skip more docs
|
||||
int newUpTo = impacts.getDocIdUpTo(level);
|
||||
if (newUpTo == NO_MORE_DOCS) {
|
||||
return NO_MORE_DOCS;
|
||||
}
|
||||
target = newUpTo + 1;
|
||||
impactsEnum.advanceShallow(target);
|
||||
impacts = impactsEnum.getImpacts();
|
||||
} else {
|
||||
target = upTo + 1;
|
||||
}
|
||||
upTo = impacts.getDocIdUpTo(0);
|
||||
maxScore = maxScoreCache.getMaxScoreForLevel(0);
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public int advance(int target) throws IOException {
|
||||
return impactsEnum.advance(advanceTarget(target));
|
||||
}
|
||||
|
||||
@Override
|
||||
public int nextDoc() throws IOException {
|
||||
return advance(impactsEnum.docID() + 1);
|
||||
}
|
||||
|
||||
@Override
|
||||
public int docID() {
|
||||
return impactsEnum.docID();
|
||||
}
|
||||
|
||||
@Override
|
||||
public long cost() {
|
||||
return impactsEnum.cost();
|
||||
}
|
||||
};
|
||||
} else {
|
||||
postingsEnum = te.postings(null, scoreMode.needsScores() ? PostingsEnum.FREQS : PostingsEnum.NONE);
|
||||
impactsEnum = new SlowImpactsEnum(postingsEnum);
|
||||
maxScoreCache = new MaxScoreCache(impactsEnum, docScorer.getSimScorer());
|
||||
iterator = postingsEnum;
|
||||
}
|
||||
/**
|
||||
* Construct a {@link TermScorer} that will use impacts to skip blocks of
|
||||
* non-competitive documents.
|
||||
*/
|
||||
TermScorer(Weight weight, ImpactsEnum impactsEnum, LeafSimScorer docScorer) {
|
||||
super(weight);
|
||||
postingsEnum = this.impactsEnum = impactsEnum;
|
||||
impactsDisi = new ImpactsDISI(impactsEnum, impactsEnum, docScorer.getSimScorer());
|
||||
iterator = impactsDisi;
|
||||
this.docScorer = docScorer;
|
||||
}
|
||||
|
||||
@Override
|
||||
|
@ -151,17 +77,17 @@ final class TermScorer extends Scorer {
|
|||
|
||||
@Override
|
||||
public int advanceShallow(int target) throws IOException {
|
||||
return maxScoreCache.advanceShallow(target);
|
||||
return impactsDisi.advanceShallow(target);
|
||||
}
|
||||
|
||||
@Override
|
||||
public float getMaxScore(int upTo) throws IOException {
|
||||
return maxScoreCache.getMaxScore(upTo);
|
||||
return impactsDisi.getMaxScore(upTo);
|
||||
}
|
||||
|
||||
@Override
|
||||
public void setMinCompetitiveScore(float minScore) {
|
||||
this.minCompetitiveScore = minScore;
|
||||
impactsDisi.setMinCompetitiveScore(minScore);
|
||||
}
|
||||
|
||||
/** Returns a string representation of this <code>TermScorer</code>. */
|
||||
|
|
|
@ -18,27 +18,39 @@ package org.apache.lucene.search;
|
|||
|
||||
|
||||
import java.io.IOException;
|
||||
import java.util.Arrays;
|
||||
import java.util.List;
|
||||
|
||||
import org.apache.lucene.document.Document;
|
||||
import org.apache.lucene.document.Field.Store;
|
||||
import org.apache.lucene.document.StringField;
|
||||
import org.apache.lucene.document.TextField;
|
||||
import org.apache.lucene.index.DirectoryReader;
|
||||
import org.apache.lucene.index.Impact;
|
||||
import org.apache.lucene.index.Impacts;
|
||||
import org.apache.lucene.index.ImpactsEnum;
|
||||
import org.apache.lucene.index.ImpactsSource;
|
||||
import org.apache.lucene.index.IndexReader;
|
||||
import org.apache.lucene.index.IndexWriter;
|
||||
import org.apache.lucene.index.RandomIndexWriter;
|
||||
import org.apache.lucene.index.Term;
|
||||
import org.apache.lucene.search.BooleanClause.Occur;
|
||||
import org.apache.lucene.store.Directory;
|
||||
import org.apache.lucene.util.BytesRef;
|
||||
import org.apache.lucene.util.LuceneTestCase;
|
||||
import org.apache.lucene.util.TestUtil;
|
||||
|
||||
public class TestSynonymQuery extends LuceneTestCase {
|
||||
|
||||
public void testEquals() {
|
||||
QueryUtils.checkEqual(new SynonymQuery(), new SynonymQuery());
|
||||
QueryUtils.checkEqual(new SynonymQuery(new Term("foo", "bar")),
|
||||
QueryUtils.checkEqual(new SynonymQuery(new Term("foo", "bar")),
|
||||
new SynonymQuery(new Term("foo", "bar")));
|
||||
|
||||
QueryUtils.checkEqual(new SynonymQuery(new Term("a", "a"), new Term("a", "b")),
|
||||
|
||||
QueryUtils.checkEqual(new SynonymQuery(new Term("a", "a"), new Term("a", "b")),
|
||||
new SynonymQuery(new Term("a", "b"), new Term("a", "a")));
|
||||
}
|
||||
|
||||
|
||||
public void testBogusParams() {
|
||||
expectThrows(IllegalArgumentException.class, () -> {
|
||||
new SynonymQuery(new Term("field1", "a"), new Term("field2", "b"));
|
||||
|
@ -54,6 +66,11 @@ public class TestSynonymQuery extends LuceneTestCase {
|
|||
}
|
||||
|
||||
public void testScores() throws IOException {
|
||||
doTestScores(false);
|
||||
doTestScores(true);
|
||||
}
|
||||
|
||||
private void doTestScores(boolean trackTotalHits) throws IOException {
|
||||
Directory dir = newDirectory();
|
||||
RandomIndexWriter w = new RandomIndexWriter(random(), dir);
|
||||
|
||||
|
@ -71,8 +88,12 @@ public class TestSynonymQuery extends LuceneTestCase {
|
|||
IndexSearcher searcher = newSearcher(reader);
|
||||
SynonymQuery query = new SynonymQuery(new Term("f", "a"), new Term("f", "b"));
|
||||
|
||||
TopDocs topDocs = searcher.search(query, 20);
|
||||
assertEquals(11, topDocs.totalHits);
|
||||
TopScoreDocCollector collector = TopScoreDocCollector.create(20, null, trackTotalHits);
|
||||
searcher.search(query, collector);
|
||||
TopDocs topDocs = collector.topDocs();
|
||||
if (trackTotalHits) {
|
||||
assertEquals(11, topDocs.totalHits);
|
||||
}
|
||||
// All docs must have the same score
|
||||
for (int i = 0; i < topDocs.scoreDocs.length; ++i) {
|
||||
assertEquals(topDocs.scoreDocs[0].score, topDocs.scoreDocs[i].score, 0.0f);
|
||||
|
@ -83,4 +104,206 @@ public class TestSynonymQuery extends LuceneTestCase {
|
|||
dir.close();
|
||||
}
|
||||
|
||||
public void testMergeImpacts() throws IOException {
|
||||
DummyImpactsEnum impacts1 = new DummyImpactsEnum();
|
||||
impacts1.reset(42,
|
||||
new Impact[][] {
|
||||
new Impact[] { new Impact(3, 10), new Impact(5, 12), new Impact(8, 13) },
|
||||
new Impact[] { new Impact(5, 11), new Impact(8, 13), new Impact(12, 14) }
|
||||
},
|
||||
new int[] {
|
||||
110,
|
||||
945
|
||||
});
|
||||
DummyImpactsEnum impacts2 = new DummyImpactsEnum();
|
||||
impacts2.reset(45,
|
||||
new Impact[][] {
|
||||
new Impact[] { new Impact(2, 10), new Impact(6, 13) },
|
||||
new Impact[] { new Impact(3, 9), new Impact(5, 11), new Impact(7, 13) }
|
||||
},
|
||||
new int[] {
|
||||
90,
|
||||
1000
|
||||
});
|
||||
|
||||
ImpactsSource mergedImpacts = SynonymQuery.mergeImpacts(new ImpactsEnum[] { impacts1, impacts2 });
|
||||
assertEquals(
|
||||
new Impact[][] {
|
||||
new Impact[] { new Impact(5, 10), new Impact(7, 12), new Impact(14, 13) },
|
||||
new Impact[] { new Impact(Integer.MAX_VALUE, 1) }
|
||||
},
|
||||
new int[] {
|
||||
90,
|
||||
1000
|
||||
},
|
||||
mergedImpacts.getImpacts());
|
||||
|
||||
// docID is > the first doIdUpTo of impacts1
|
||||
impacts2.reset(112,
|
||||
new Impact[][] {
|
||||
new Impact[] { new Impact(2, 10), new Impact(6, 13) },
|
||||
new Impact[] { new Impact(3, 9), new Impact(5, 11), new Impact(7, 13) }
|
||||
},
|
||||
new int[] {
|
||||
150,
|
||||
1000
|
||||
});
|
||||
assertEquals(
|
||||
new Impact[][] {
|
||||
new Impact[] { new Impact(3, 10), new Impact(5, 12), new Impact(8, 13) }, // same as impacts1
|
||||
new Impact[] { new Impact(3, 9), new Impact(10, 11), new Impact(15, 13), new Impact(19, 14) }
|
||||
},
|
||||
new int[] {
|
||||
110,
|
||||
945
|
||||
},
|
||||
mergedImpacts.getImpacts());
|
||||
}
|
||||
|
||||
private static void assertEquals(Impact[][] impacts, int[] docIdUpTo, Impacts actual) {
|
||||
assertEquals(impacts.length, actual.numLevels());
|
||||
for (int i = 0; i < impacts.length; ++i) {
|
||||
assertEquals(docIdUpTo[i], actual.getDocIdUpTo(i));
|
||||
assertEquals(Arrays.asList(impacts[i]), actual.getImpacts(i));
|
||||
}
|
||||
}
|
||||
|
||||
private static class DummyImpactsEnum extends ImpactsEnum {
|
||||
|
||||
private int docID;
|
||||
private Impact[][] impacts;
|
||||
private int[] docIdUpTo;
|
||||
|
||||
void reset(int docID, Impact[][] impacts, int[] docIdUpTo) {
|
||||
this.docID = docID;
|
||||
this.impacts = impacts;
|
||||
this.docIdUpTo = docIdUpTo;
|
||||
}
|
||||
|
||||
@Override
|
||||
public void advanceShallow(int target) throws IOException {
|
||||
throw new UnsupportedOperationException();
|
||||
}
|
||||
|
||||
@Override
|
||||
public Impacts getImpacts() throws IOException {
|
||||
return new Impacts() {
|
||||
|
||||
@Override
|
||||
public int numLevels() {
|
||||
return impacts.length;
|
||||
}
|
||||
|
||||
@Override
|
||||
public int getDocIdUpTo(int level) {
|
||||
return docIdUpTo[level];
|
||||
}
|
||||
|
||||
@Override
|
||||
public List<Impact> getImpacts(int level) {
|
||||
return Arrays.asList(impacts[level]);
|
||||
}
|
||||
|
||||
};
|
||||
}
|
||||
|
||||
@Override
|
||||
public int freq() throws IOException {
|
||||
throw new UnsupportedOperationException();
|
||||
}
|
||||
|
||||
@Override
|
||||
public int nextPosition() throws IOException {
|
||||
throw new UnsupportedOperationException();
|
||||
}
|
||||
|
||||
@Override
|
||||
public int startOffset() throws IOException {
|
||||
throw new UnsupportedOperationException();
|
||||
}
|
||||
|
||||
@Override
|
||||
public int endOffset() throws IOException {
|
||||
throw new UnsupportedOperationException();
|
||||
}
|
||||
|
||||
@Override
|
||||
public BytesRef getPayload() throws IOException {
|
||||
throw new UnsupportedOperationException();
|
||||
}
|
||||
|
||||
@Override
|
||||
public int docID() {
|
||||
return docID;
|
||||
}
|
||||
|
||||
@Override
|
||||
public int nextDoc() throws IOException {
|
||||
throw new UnsupportedOperationException();
|
||||
}
|
||||
|
||||
@Override
|
||||
public int advance(int target) throws IOException {
|
||||
throw new UnsupportedOperationException();
|
||||
}
|
||||
|
||||
@Override
|
||||
public long cost() {
|
||||
throw new UnsupportedOperationException();
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
public void testRandomTopDocs() throws IOException {
|
||||
Directory dir = newDirectory();
|
||||
IndexWriter w = new IndexWriter(dir, newIndexWriterConfig());
|
||||
int numDocs = atLeast(128 * 8 * 8 * 3); // make sure some terms have skip data
|
||||
for (int i = 0; i < numDocs; ++i) {
|
||||
Document doc = new Document();
|
||||
int numValues = random().nextInt(1 << random().nextInt(5));
|
||||
int start = random().nextInt(10);
|
||||
for (int j = 0; j < numValues; ++j) {
|
||||
int freq = TestUtil.nextInt(random(), 1, 1 << random().nextInt(3));
|
||||
for (int k = 0; k < freq; ++k) {
|
||||
doc.add(new TextField("foo", Integer.toString(start + j), Store.NO));
|
||||
}
|
||||
}
|
||||
w.addDocument(doc);
|
||||
}
|
||||
IndexReader reader = DirectoryReader.open(w);
|
||||
w.close();
|
||||
IndexSearcher searcher = newSearcher(reader);
|
||||
|
||||
for (int term1 = 0; term1 < 15; ++term1) {
|
||||
int term2;
|
||||
do {
|
||||
term2 = random().nextInt(15);
|
||||
} while (term1 == term2);
|
||||
Query query = new SynonymQuery(
|
||||
new Term[] {
|
||||
new Term("foo", Integer.toString(term1)),
|
||||
new Term("foo", Integer.toString(term2))});
|
||||
|
||||
TopScoreDocCollector collector1 = TopScoreDocCollector.create(10, null, true); // COMPLETE
|
||||
TopScoreDocCollector collector2 = TopScoreDocCollector.create(10, null, false); // TOP_SCORES
|
||||
|
||||
searcher.search(query, collector1);
|
||||
searcher.search(query, collector2);
|
||||
CheckHits.checkEqual(query, collector1.topDocs().scoreDocs, collector2.topDocs().scoreDocs);
|
||||
|
||||
int filterTerm = random().nextInt(15);
|
||||
Query filteredQuery = new BooleanQuery.Builder()
|
||||
.add(query, Occur.MUST)
|
||||
.add(new TermQuery(new Term("foo", Integer.toString(filterTerm))), Occur.FILTER)
|
||||
.build();
|
||||
|
||||
collector1 = TopScoreDocCollector.create(10, null, true); // COMPLETE
|
||||
collector2 = TopScoreDocCollector.create(10, null, false); // TOP_SCORES
|
||||
searcher.search(filteredQuery, collector1);
|
||||
searcher.search(filteredQuery, collector2);
|
||||
CheckHits.checkEqual(query, collector1.topDocs().scoreDocs, collector2.topDocs().scoreDocs);
|
||||
}
|
||||
reader.close();
|
||||
dir.close();
|
||||
}
|
||||
}
|
||||
|
|
|
@ -24,7 +24,7 @@ import java.util.List;
|
|||
import org.apache.lucene.analysis.MockAnalyzer;
|
||||
import org.apache.lucene.document.Document;
|
||||
import org.apache.lucene.document.Field;
|
||||
import org.apache.lucene.document.StringField;
|
||||
import org.apache.lucene.document.TextField;
|
||||
import org.apache.lucene.document.Field.Store;
|
||||
import org.apache.lucene.index.DirectoryReader;
|
||||
import org.apache.lucene.index.FilterLeafReader;
|
||||
|
@ -39,6 +39,7 @@ import org.apache.lucene.search.BooleanClause.Occur;
|
|||
import org.apache.lucene.search.similarities.ClassicSimilarity;
|
||||
import org.apache.lucene.store.Directory;
|
||||
import org.apache.lucene.util.LuceneTestCase;
|
||||
import org.apache.lucene.util.TestUtil;
|
||||
|
||||
public class TestTermScorer extends LuceneTestCase {
|
||||
protected Directory directory;
|
||||
|
@ -218,7 +219,10 @@ public class TestTermScorer extends LuceneTestCase {
|
|||
int numValues = random().nextInt(1 << random().nextInt(5));
|
||||
int start = random().nextInt(10);
|
||||
for (int j = 0; j < numValues; ++j) {
|
||||
doc.add(new StringField("foo", Integer.toString(start + j), Store.NO));
|
||||
int freq = TestUtil.nextInt(random(), 1, 1 << random().nextInt(3));
|
||||
for (int k = 0; k < freq; ++k) {
|
||||
doc.add(new TextField("foo", Integer.toString(start + j), Store.NO));
|
||||
}
|
||||
}
|
||||
w.addDocument(doc);
|
||||
}
|
||||
|
@ -234,7 +238,7 @@ public class TestTermScorer extends LuceneTestCase {
|
|||
|
||||
searcher.search(query, collector1);
|
||||
searcher.search(query, collector2);
|
||||
assertTopDocsEquals(collector1.topDocs(), collector2.topDocs());
|
||||
CheckHits.checkEqual(query, collector1.topDocs().scoreDocs, collector2.topDocs().scoreDocs);
|
||||
|
||||
int filterTerm = random().nextInt(15);
|
||||
Query filteredQuery = new BooleanQuery.Builder()
|
||||
|
@ -246,19 +250,10 @@ public class TestTermScorer extends LuceneTestCase {
|
|||
collector2 = TopScoreDocCollector.create(10, null, false); // TOP_SCORES
|
||||
searcher.search(filteredQuery, collector1);
|
||||
searcher.search(filteredQuery, collector2);
|
||||
assertTopDocsEquals(collector1.topDocs(), collector2.topDocs());
|
||||
CheckHits.checkEqual(query, collector1.topDocs().scoreDocs, collector2.topDocs().scoreDocs);
|
||||
}
|
||||
reader.close();
|
||||
dir.close();
|
||||
}
|
||||
|
||||
private static void assertTopDocsEquals(TopDocs td1, TopDocs td2) {
|
||||
assertEquals(td1.scoreDocs.length, td2.scoreDocs.length);
|
||||
for (int i = 0; i < td1.scoreDocs.length; ++i) {
|
||||
ScoreDoc sd1 = td1.scoreDocs[i];
|
||||
ScoreDoc sd2 = td2.scoreDocs[i];
|
||||
assertEquals(sd1.doc, sd2.doc);
|
||||
assertEquals(sd1.score, sd2.score, 0f);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
Loading…
Reference in New Issue