mirror of https://github.com/apache/lucene.git
LUCENE-8312: Leverage impacts to speed up SynonymQuery.
This commit is contained in:
parent
24d79de796
commit
0a1de2c4a5
|
@ -30,8 +30,8 @@ import org.apache.lucene.index.Terms;
|
||||||
import org.apache.lucene.index.TermsEnum;
|
import org.apache.lucene.index.TermsEnum;
|
||||||
import org.apache.lucene.search.DocIdSetIterator;
|
import org.apache.lucene.search.DocIdSetIterator;
|
||||||
import org.apache.lucene.search.Explanation;
|
import org.apache.lucene.search.Explanation;
|
||||||
|
import org.apache.lucene.search.ImpactsDISI;
|
||||||
import org.apache.lucene.search.IndexSearcher;
|
import org.apache.lucene.search.IndexSearcher;
|
||||||
import org.apache.lucene.search.MaxScoreCache;
|
|
||||||
import org.apache.lucene.search.Query;
|
import org.apache.lucene.search.Query;
|
||||||
import org.apache.lucene.search.ScoreMode;
|
import org.apache.lucene.search.ScoreMode;
|
||||||
import org.apache.lucene.search.Scorer;
|
import org.apache.lucene.search.Scorer;
|
||||||
|
@ -133,9 +133,9 @@ final class FeatureQuery extends Query {
|
||||||
return null;
|
return null;
|
||||||
}
|
}
|
||||||
|
|
||||||
SimScorer scorer = function.scorer(boost);
|
final SimScorer scorer = function.scorer(boost);
|
||||||
ImpactsEnum impacts = termsEnum.impacts(PostingsEnum.FREQS);
|
final ImpactsEnum impacts = termsEnum.impacts(PostingsEnum.FREQS);
|
||||||
MaxScoreCache maxScoreCache = new MaxScoreCache(impacts, scorer);
|
final ImpactsDISI impactsDisi = new ImpactsDISI(impacts, impacts, scorer);
|
||||||
|
|
||||||
return new Scorer(this) {
|
return new Scorer(this) {
|
||||||
|
|
||||||
|
@ -151,19 +151,23 @@ final class FeatureQuery extends Query {
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public DocIdSetIterator iterator() {
|
public DocIdSetIterator iterator() {
|
||||||
return impacts;
|
return impactsDisi;
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public int advanceShallow(int target) throws IOException {
|
public int advanceShallow(int target) throws IOException {
|
||||||
return maxScoreCache.advanceShallow(target);
|
return impactsDisi.advanceShallow(target);
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public float getMaxScore(int upTo) throws IOException {
|
public float getMaxScore(int upTo) throws IOException {
|
||||||
return maxScoreCache.getMaxScore(upTo);
|
return impactsDisi.getMaxScore(upTo);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public void setMinCompetitiveScore(float minScore) {
|
||||||
|
impactsDisi.setMinCompetitiveScore(minScore);
|
||||||
|
}
|
||||||
};
|
};
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -16,41 +16,14 @@
|
||||||
*/
|
*/
|
||||||
package org.apache.lucene.index;
|
package org.apache.lucene.index;
|
||||||
|
|
||||||
import java.io.IOException;
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Extension of {@link PostingsEnum} which also provides information about
|
* Extension of {@link PostingsEnum} which also provides information about
|
||||||
* upcoming impacts.
|
* upcoming impacts.
|
||||||
* @lucene.experimental
|
* @lucene.experimental
|
||||||
*/
|
*/
|
||||||
public abstract class ImpactsEnum extends PostingsEnum {
|
public abstract class ImpactsEnum extends PostingsEnum implements ImpactsSource {
|
||||||
|
|
||||||
/** Sole constructor. */
|
/** Sole constructor. */
|
||||||
protected ImpactsEnum() {}
|
protected ImpactsEnum() {}
|
||||||
|
|
||||||
/**
|
|
||||||
* Shallow-advance to {@code target}. This is cheaper than calling
|
|
||||||
* {@link #advance(int)} and allows further calls to {@link #getImpacts()}
|
|
||||||
* to ignore doc IDs that are less than {@code target} in order to get more
|
|
||||||
* precise information about impacts.
|
|
||||||
* This method may not be called on targets that are less than the current
|
|
||||||
* {@link #docID()}.
|
|
||||||
* After this method has been called, {@link #nextDoc()} may not be called
|
|
||||||
* if the current doc ID is less than {@code target - 1} and
|
|
||||||
* {@link #advance(int)} may not be called on targets that are less than
|
|
||||||
* {@code target}.
|
|
||||||
*/
|
|
||||||
public abstract void advanceShallow(int target) throws IOException;
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Get information about upcoming impacts for doc ids that are greater than
|
|
||||||
* or equal to the maximum of {@link #docID()} and the last target that was
|
|
||||||
* passed to {@link #advanceShallow(int)}.
|
|
||||||
* This method may not be called on an unpositioned iterator on which
|
|
||||||
* {@link #advanceShallow(int)} has never been called.
|
|
||||||
* NOTE: advancing this iterator may invalidate the returned impacts, so they
|
|
||||||
* should not be used after the iterator has been advanced.
|
|
||||||
*/
|
|
||||||
public abstract Impacts getImpacts() throws IOException;
|
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
|
@ -0,0 +1,54 @@
|
||||||
|
/*
|
||||||
|
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||||
|
* contributor license agreements. See the NOTICE file distributed with
|
||||||
|
* this work for additional information regarding copyright ownership.
|
||||||
|
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||||
|
* (the "License"); you may not use this file except in compliance with
|
||||||
|
* the License. You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
* See the License for the specific language governing permissions and
|
||||||
|
* limitations under the License.
|
||||||
|
*/
|
||||||
|
package org.apache.lucene.index;
|
||||||
|
|
||||||
|
import java.io.IOException;
|
||||||
|
|
||||||
|
import org.apache.lucene.search.DocIdSetIterator;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Source of {@link Impacts}.
|
||||||
|
* @lucene.internal
|
||||||
|
*/
|
||||||
|
public interface ImpactsSource {
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Shallow-advance to {@code target}. This is cheaper than calling
|
||||||
|
* {@link DocIdSetIterator#advance(int)} and allows further calls to
|
||||||
|
* {@link #getImpacts()} to ignore doc IDs that are less than {@code target}
|
||||||
|
* in order to get more precise information about impacts.
|
||||||
|
* This method may not be called on targets that are less than the current
|
||||||
|
* {@link DocIdSetIterator#docID()}.
|
||||||
|
* After this method has been called, {@link DocIdSetIterator#nextDoc()} may
|
||||||
|
* not be called if the current doc ID is less than {@code target - 1} and
|
||||||
|
* {@link DocIdSetIterator#advance(int)} may not be called on targets that
|
||||||
|
* are less than {@code target}.
|
||||||
|
*/
|
||||||
|
void advanceShallow(int target) throws IOException;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Get information about upcoming impacts for doc ids that are greater than
|
||||||
|
* or equal to the maximum of {@link DocIdSetIterator#docID()} and the last
|
||||||
|
* target that was passed to {@link #advanceShallow(int)}.
|
||||||
|
* This method may not be called on an unpositioned iterator on which
|
||||||
|
* {@link #advanceShallow(int)} has never been called.
|
||||||
|
* NOTE: advancing this iterator may invalidate the returned impacts, so they
|
||||||
|
* should not be used after the iterator has been advanced.
|
||||||
|
*/
|
||||||
|
Impacts getImpacts() throws IOException;
|
||||||
|
|
||||||
|
}
|
|
@ -0,0 +1,149 @@
|
||||||
|
/*
|
||||||
|
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||||
|
* contributor license agreements. See the NOTICE file distributed with
|
||||||
|
* this work for additional information regarding copyright ownership.
|
||||||
|
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||||
|
* (the "License"); you may not use this file except in compliance with
|
||||||
|
* the License. You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
* See the License for the specific language governing permissions and
|
||||||
|
* limitations under the License.
|
||||||
|
*/
|
||||||
|
package org.apache.lucene.search;
|
||||||
|
|
||||||
|
import java.io.IOException;
|
||||||
|
|
||||||
|
import org.apache.lucene.index.Impacts;
|
||||||
|
import org.apache.lucene.index.ImpactsEnum;
|
||||||
|
import org.apache.lucene.index.ImpactsSource;
|
||||||
|
import org.apache.lucene.search.similarities.Similarity.SimScorer;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* {@link DocIdSetIterator} that skips non-competitive docs thanks to the
|
||||||
|
* indexed impacts. Call {@link #setMinCompetitiveScore(float)} in order to
|
||||||
|
* give this iterator the ability to skip low-scoring documents.
|
||||||
|
* @lucene.internal
|
||||||
|
*/
|
||||||
|
public final class ImpactsDISI extends DocIdSetIterator {
|
||||||
|
|
||||||
|
private final DocIdSetIterator in;
|
||||||
|
private final ImpactsSource impactsSource;
|
||||||
|
private final MaxScoreCache maxScoreCache;
|
||||||
|
private final float globalMaxScore;
|
||||||
|
private float minCompetitiveScore = 0;
|
||||||
|
private int upTo = DocIdSetIterator.NO_MORE_DOCS;
|
||||||
|
private float maxScore = Float.MAX_VALUE;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Sole constructor.
|
||||||
|
* @param in wrapped iterator
|
||||||
|
* @param impactsSource source of impacts
|
||||||
|
* @param scorer scorer
|
||||||
|
*/
|
||||||
|
public ImpactsDISI(DocIdSetIterator in, ImpactsSource impactsSource, SimScorer scorer) {
|
||||||
|
this.in = in;
|
||||||
|
this.impactsSource = impactsSource;
|
||||||
|
this.maxScoreCache = new MaxScoreCache(impactsSource, scorer);
|
||||||
|
this.globalMaxScore = scorer.score(Float.MAX_VALUE, 1L);
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Set the minimum competitive score.
|
||||||
|
* @see Scorer#setMinCompetitiveScore(float)
|
||||||
|
*/
|
||||||
|
public void setMinCompetitiveScore(float minCompetitiveScore) {
|
||||||
|
assert minCompetitiveScore >= this.minCompetitiveScore;
|
||||||
|
if (minCompetitiveScore > this.minCompetitiveScore) {
|
||||||
|
this.minCompetitiveScore = minCompetitiveScore;
|
||||||
|
// force upTo and maxScore to be recomputed so that we will skip documents
|
||||||
|
// if the current block of documents is not competitive - only if the min
|
||||||
|
// competitive score actually increased
|
||||||
|
upTo = -1;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Implement the contract of {@link Scorer#advanceShallow(int)} based on the
|
||||||
|
* wrapped {@link ImpactsEnum}.
|
||||||
|
* @see Scorer#advanceShallow(int)
|
||||||
|
*/
|
||||||
|
public int advanceShallow(int target) throws IOException {
|
||||||
|
impactsSource.advanceShallow(target);
|
||||||
|
Impacts impacts = impactsSource.getImpacts();
|
||||||
|
return impacts.getDocIdUpTo(0);
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Implement the contract of {@link Scorer#getMaxScore(int)} based on the
|
||||||
|
* wrapped {@link ImpactsEnum} and {@link Scorer}.
|
||||||
|
* @see Scorer#getMaxScore(int)
|
||||||
|
*/
|
||||||
|
public float getMaxScore(int upTo) throws IOException {
|
||||||
|
final int level = maxScoreCache.getLevel(upTo);
|
||||||
|
if (level == -1) {
|
||||||
|
return globalMaxScore;
|
||||||
|
} else {
|
||||||
|
return maxScoreCache.getMaxScoreForLevel(level);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
private int advanceTarget(int target) throws IOException {
|
||||||
|
if (target <= upTo) {
|
||||||
|
// we are still in the current block, which is considered competitive
|
||||||
|
// according to impacts, no skipping
|
||||||
|
return target;
|
||||||
|
}
|
||||||
|
|
||||||
|
upTo = advanceShallow(target);
|
||||||
|
maxScore = maxScoreCache.getMaxScoreForLevel(0);
|
||||||
|
|
||||||
|
while (true) {
|
||||||
|
assert upTo >= target;
|
||||||
|
|
||||||
|
if (maxScore >= minCompetitiveScore) {
|
||||||
|
return target;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (upTo == NO_MORE_DOCS) {
|
||||||
|
return NO_MORE_DOCS;
|
||||||
|
}
|
||||||
|
|
||||||
|
final int skipUpTo = maxScoreCache.getSkipUpTo(minCompetitiveScore);
|
||||||
|
if (skipUpTo == -1) { // no further skipping
|
||||||
|
target = upTo + 1;
|
||||||
|
} else if (skipUpTo == NO_MORE_DOCS) {
|
||||||
|
return NO_MORE_DOCS;
|
||||||
|
} else {
|
||||||
|
target = skipUpTo + 1;
|
||||||
|
}
|
||||||
|
upTo = advanceShallow(target);
|
||||||
|
maxScore = maxScoreCache.getMaxScoreForLevel(0);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public int advance(int target) throws IOException {
|
||||||
|
return in.advance(advanceTarget(target));
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public int nextDoc() throws IOException {
|
||||||
|
return advance(in.docID() + 1);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public int docID() {
|
||||||
|
return in.docID();
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public long cost() {
|
||||||
|
return in.cost();
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
|
@ -22,7 +22,7 @@ import java.util.List;
|
||||||
|
|
||||||
import org.apache.lucene.index.Impact;
|
import org.apache.lucene.index.Impact;
|
||||||
import org.apache.lucene.index.Impacts;
|
import org.apache.lucene.index.Impacts;
|
||||||
import org.apache.lucene.index.ImpactsEnum;
|
import org.apache.lucene.index.ImpactsSource;
|
||||||
import org.apache.lucene.search.similarities.Similarity.SimScorer;
|
import org.apache.lucene.search.similarities.Similarity.SimScorer;
|
||||||
import org.apache.lucene.util.ArrayUtil;
|
import org.apache.lucene.util.ArrayUtil;
|
||||||
|
|
||||||
|
@ -30,22 +30,21 @@ import org.apache.lucene.util.ArrayUtil;
|
||||||
* Compute maximum scores based on {@link Impacts} and keep them in a cache in
|
* Compute maximum scores based on {@link Impacts} and keep them in a cache in
|
||||||
* order not to run expensive similarity score computations multiple times on
|
* order not to run expensive similarity score computations multiple times on
|
||||||
* the same data.
|
* the same data.
|
||||||
|
* @lucene.internal
|
||||||
*/
|
*/
|
||||||
public final class MaxScoreCache {
|
final class MaxScoreCache {
|
||||||
|
|
||||||
private final ImpactsEnum impactsEnum;
|
private final ImpactsSource impactsSource;
|
||||||
private final SimScorer scorer;
|
private final SimScorer scorer;
|
||||||
private final float globalMaxScore;
|
|
||||||
private float[] maxScoreCache;
|
private float[] maxScoreCache;
|
||||||
private int[] maxScoreCacheUpTo;
|
private int[] maxScoreCacheUpTo;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Sole constructor.
|
* Sole constructor.
|
||||||
*/
|
*/
|
||||||
public MaxScoreCache(ImpactsEnum impactsEnum, SimScorer scorer) {
|
public MaxScoreCache(ImpactsSource impactsSource, SimScorer scorer) {
|
||||||
this.impactsEnum = impactsEnum;
|
this.impactsSource = impactsSource;
|
||||||
this.scorer = scorer;
|
this.scorer = scorer;
|
||||||
globalMaxScore = scorer.score(Integer.MAX_VALUE, 1L);
|
|
||||||
maxScoreCache = new float[0];
|
maxScoreCache = new float[0];
|
||||||
maxScoreCacheUpTo = new int[0];
|
maxScoreCacheUpTo = new int[0];
|
||||||
}
|
}
|
||||||
|
@ -71,8 +70,8 @@ public final class MaxScoreCache {
|
||||||
* Return the first level that includes all doc IDs up to {@code upTo},
|
* Return the first level that includes all doc IDs up to {@code upTo},
|
||||||
* or -1 if there is no such level.
|
* or -1 if there is no such level.
|
||||||
*/
|
*/
|
||||||
private int getLevel(int upTo) throws IOException {
|
int getLevel(int upTo) throws IOException {
|
||||||
final Impacts impacts = impactsEnum.getImpacts();
|
final Impacts impacts = impactsSource.getImpacts();
|
||||||
for (int level = 0, numLevels = impacts.numLevels(); level < numLevels; ++level) {
|
for (int level = 0, numLevels = impacts.numLevels(); level < numLevels; ++level) {
|
||||||
final int impactsUpTo = impacts.getDocIdUpTo(level);
|
final int impactsUpTo = impacts.getDocIdUpTo(level);
|
||||||
if (upTo <= impactsUpTo) {
|
if (upTo <= impactsUpTo) {
|
||||||
|
@ -86,7 +85,7 @@ public final class MaxScoreCache {
|
||||||
* Return the maximum score for the given {@code level}.
|
* Return the maximum score for the given {@code level}.
|
||||||
*/
|
*/
|
||||||
float getMaxScoreForLevel(int level) throws IOException {
|
float getMaxScoreForLevel(int level) throws IOException {
|
||||||
final Impacts impacts = impactsEnum.getImpacts();
|
final Impacts impacts = impactsSource.getImpacts();
|
||||||
ensureCacheSize(level + 1);
|
ensureCacheSize(level + 1);
|
||||||
final int levelUpTo = impacts.getDocIdUpTo(level);
|
final int levelUpTo = impacts.getDocIdUpTo(level);
|
||||||
if (maxScoreCacheUpTo[level] < levelUpTo) {
|
if (maxScoreCacheUpTo[level] < levelUpTo) {
|
||||||
|
@ -100,8 +99,7 @@ public final class MaxScoreCache {
|
||||||
* Return the maximum level at which scores are all less than {@code minScore},
|
* Return the maximum level at which scores are all less than {@code minScore},
|
||||||
* or -1 if none.
|
* or -1 if none.
|
||||||
*/
|
*/
|
||||||
int getSkipLevel(float minScore) throws IOException {
|
private int getSkipLevel(Impacts impacts, float minScore) throws IOException {
|
||||||
final Impacts impacts = impactsEnum.getImpacts();
|
|
||||||
final int numLevels = impacts.numLevels();
|
final int numLevels = impacts.numLevels();
|
||||||
for (int level = 0; level < numLevels; ++level) {
|
for (int level = 0; level < numLevels; ++level) {
|
||||||
if (getMaxScoreForLevel(level) >= minScore) {
|
if (getMaxScoreForLevel(level) >= minScore) {
|
||||||
|
@ -112,27 +110,17 @@ public final class MaxScoreCache {
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Implement the contract of {@link Scorer#advanceShallow(int)} based on the
|
* Return the an inclusive upper bound of documents that all have a score that
|
||||||
* wrapped {@link ImpactsEnum}.
|
* is less than {@code minScore}, or {@code -1} if the current document may
|
||||||
* @see Scorer#advanceShallow(int)
|
* be competitive.
|
||||||
*/
|
*/
|
||||||
public int advanceShallow(int target) throws IOException {
|
int getSkipUpTo(float minScore) throws IOException {
|
||||||
impactsEnum.advanceShallow(target);
|
final Impacts impacts = impactsSource.getImpacts();
|
||||||
Impacts impacts = impactsEnum.getImpacts();
|
final int level = getSkipLevel(impacts, minScore);
|
||||||
return impacts.getDocIdUpTo(0);
|
if (level == -1) {
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
return impacts.getDocIdUpTo(level);
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
|
||||||
* Implement the contract of {@link Scorer#getMaxScore(int)} based on the
|
|
||||||
* wrapped {@link ImpactsEnum} and {@link Scorer}.
|
|
||||||
* @see Scorer#getMaxScore(int)
|
|
||||||
*/
|
|
||||||
public float getMaxScore(int upTo) throws IOException {
|
|
||||||
final int level = getLevel(upTo);
|
|
||||||
if (level == -1) {
|
|
||||||
return globalMaxScore;
|
|
||||||
} else {
|
|
||||||
return getMaxScoreForLevel(level);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
|
@ -21,12 +21,19 @@ import java.io.IOException;
|
||||||
import java.util.ArrayList;
|
import java.util.ArrayList;
|
||||||
import java.util.Arrays;
|
import java.util.Arrays;
|
||||||
import java.util.Collections;
|
import java.util.Collections;
|
||||||
|
import java.util.Iterator;
|
||||||
import java.util.List;
|
import java.util.List;
|
||||||
import java.util.Objects;
|
import java.util.Objects;
|
||||||
import java.util.Set;
|
import java.util.Set;
|
||||||
|
|
||||||
|
import org.apache.lucene.index.Impact;
|
||||||
|
import org.apache.lucene.index.Impacts;
|
||||||
|
import org.apache.lucene.index.ImpactsEnum;
|
||||||
|
import org.apache.lucene.index.ImpactsSource;
|
||||||
import org.apache.lucene.index.IndexReader;
|
import org.apache.lucene.index.IndexReader;
|
||||||
import org.apache.lucene.index.LeafReaderContext;
|
import org.apache.lucene.index.LeafReaderContext;
|
||||||
|
import org.apache.lucene.index.PostingsEnum;
|
||||||
|
import org.apache.lucene.index.SlowImpactsEnum;
|
||||||
import org.apache.lucene.index.Term;
|
import org.apache.lucene.index.Term;
|
||||||
import org.apache.lucene.index.TermState;
|
import org.apache.lucene.index.TermState;
|
||||||
import org.apache.lucene.index.TermStates;
|
import org.apache.lucene.index.TermStates;
|
||||||
|
@ -34,6 +41,7 @@ import org.apache.lucene.index.Terms;
|
||||||
import org.apache.lucene.index.TermsEnum;
|
import org.apache.lucene.index.TermsEnum;
|
||||||
import org.apache.lucene.search.similarities.Similarity;
|
import org.apache.lucene.search.similarities.Similarity;
|
||||||
import org.apache.lucene.util.BytesRef;
|
import org.apache.lucene.util.BytesRef;
|
||||||
|
import org.apache.lucene.util.PriorityQueue;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* A query that treats multiple terms as synonyms.
|
* A query that treats multiple terms as synonyms.
|
||||||
|
@ -112,7 +120,7 @@ public final class SynonymQuery extends Query {
|
||||||
@Override
|
@Override
|
||||||
public Weight createWeight(IndexSearcher searcher, ScoreMode scoreMode, float boost) throws IOException {
|
public Weight createWeight(IndexSearcher searcher, ScoreMode scoreMode, float boost) throws IOException {
|
||||||
if (scoreMode.needsScores()) {
|
if (scoreMode.needsScores()) {
|
||||||
return new SynonymWeight(this, searcher, boost);
|
return new SynonymWeight(this, searcher, scoreMode, boost);
|
||||||
} else {
|
} else {
|
||||||
// if scores are not needed, let BooleanWeight deal with optimizing that case.
|
// if scores are not needed, let BooleanWeight deal with optimizing that case.
|
||||||
BooleanQuery.Builder bq = new BooleanQuery.Builder();
|
BooleanQuery.Builder bq = new BooleanQuery.Builder();
|
||||||
|
@ -127,9 +135,12 @@ public final class SynonymQuery extends Query {
|
||||||
private final TermStates termStates[];
|
private final TermStates termStates[];
|
||||||
private final Similarity similarity;
|
private final Similarity similarity;
|
||||||
private final Similarity.SimScorer simWeight;
|
private final Similarity.SimScorer simWeight;
|
||||||
|
private final ScoreMode scoreMode;
|
||||||
|
|
||||||
SynonymWeight(Query query, IndexSearcher searcher, float boost) throws IOException {
|
SynonymWeight(Query query, IndexSearcher searcher, ScoreMode scoreMode, float boost) throws IOException {
|
||||||
super(query);
|
super(query);
|
||||||
|
assert scoreMode.needsScores();
|
||||||
|
this.scoreMode = scoreMode;
|
||||||
CollectionStatistics collectionStats = searcher.collectionStatistics(terms[0].field());
|
CollectionStatistics collectionStats = searcher.collectionStatistics(terms[0].field());
|
||||||
long docFreq = 0;
|
long docFreq = 0;
|
||||||
long totalTermFreq = 0;
|
long totalTermFreq = 0;
|
||||||
|
@ -176,8 +187,7 @@ public final class SynonymQuery extends Query {
|
||||||
if (newDoc == doc) {
|
if (newDoc == doc) {
|
||||||
final float freq;
|
final float freq;
|
||||||
if (scorer instanceof SynonymScorer) {
|
if (scorer instanceof SynonymScorer) {
|
||||||
SynonymScorer synScorer = (SynonymScorer) scorer;
|
freq = ((SynonymScorer) scorer).freq();
|
||||||
freq = synScorer.tf(synScorer.getSubMatches());
|
|
||||||
} else {
|
} else {
|
||||||
assert scorer instanceof TermScorer;
|
assert scorer instanceof TermScorer;
|
||||||
freq = ((TermScorer)scorer).freq();
|
freq = ((TermScorer)scorer).freq();
|
||||||
|
@ -197,61 +207,277 @@ public final class SynonymQuery extends Query {
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public Scorer scorer(LeafReaderContext context) throws IOException {
|
public Scorer scorer(LeafReaderContext context) throws IOException {
|
||||||
// we use termscorers + disjunction as an impl detail
|
List<PostingsEnum> iterators = new ArrayList<>();
|
||||||
List<Scorer> subScorers = new ArrayList<>();
|
List<ImpactsEnum> impacts = new ArrayList<>();
|
||||||
for (int i = 0; i < terms.length; i++) {
|
for (int i = 0; i < terms.length; i++) {
|
||||||
TermState state = termStates[i].get(context);
|
TermState state = termStates[i].get(context);
|
||||||
if (state != null) {
|
if (state != null) {
|
||||||
TermsEnum termsEnum = context.reader().terms(terms[i].field()).iterator();
|
TermsEnum termsEnum = context.reader().terms(terms[i].field()).iterator();
|
||||||
termsEnum.seekExact(terms[i].bytes(), state);
|
termsEnum.seekExact(terms[i].bytes(), state);
|
||||||
LeafSimScorer simScorer = new LeafSimScorer(simWeight, context.reader(), terms[0].field(), true);
|
if (scoreMode == ScoreMode.TOP_SCORES) {
|
||||||
subScorers.add(new TermScorer(this, termsEnum, ScoreMode.COMPLETE, simScorer));
|
ImpactsEnum impactsEnum = termsEnum.impacts(PostingsEnum.FREQS);
|
||||||
}
|
iterators.add(impactsEnum);
|
||||||
}
|
impacts.add(impactsEnum);
|
||||||
if (subScorers.isEmpty()) {
|
|
||||||
return null;
|
|
||||||
} else if (subScorers.size() == 1) {
|
|
||||||
// we must optimize this case (term not in segment), disjunctionscorer requires >= 2 subs
|
|
||||||
return subScorers.get(0);
|
|
||||||
} else {
|
} else {
|
||||||
LeafSimScorer simScorer = new LeafSimScorer(simWeight, context.reader(), terms[0].field(), true);
|
PostingsEnum postingsEnum = termsEnum.postings(null, PostingsEnum.FREQS);
|
||||||
return new SynonymScorer(simScorer, this, subScorers);
|
iterators.add(postingsEnum);
|
||||||
|
impacts.add(new SlowImpactsEnum(postingsEnum));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if (iterators.isEmpty()) {
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
|
||||||
|
LeafSimScorer simScorer = new LeafSimScorer(simWeight, context.reader(), terms[0].field(), true);
|
||||||
|
|
||||||
|
// we must optimize this case (term not in segment), disjunctions require >= 2 subs
|
||||||
|
if (iterators.size() == 1) {
|
||||||
|
if (scoreMode == ScoreMode.TOP_SCORES) {
|
||||||
|
return new TermScorer(this, impacts.get(0), simScorer);
|
||||||
|
} else {
|
||||||
|
return new TermScorer(this, iterators.get(0), simScorer);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// we use termscorers + disjunction as an impl detail
|
||||||
|
DisiPriorityQueue queue = new DisiPriorityQueue(iterators.size());
|
||||||
|
for (PostingsEnum postings : iterators) {
|
||||||
|
queue.add(new DisiWrapper(new TermScorer(this, postings, simScorer)));
|
||||||
|
}
|
||||||
|
// Even though it is called approximation, it is accurate since none of
|
||||||
|
// the sub iterators are two-phase iterators.
|
||||||
|
DocIdSetIterator iterator = new DisjunctionDISIApproximation(queue);
|
||||||
|
|
||||||
|
ImpactsSource impactsSource = mergeImpacts(impacts.toArray(new ImpactsEnum[0]));
|
||||||
|
ImpactsDISI impactsDisi = new ImpactsDISI(iterator, impactsSource, simScorer.getSimScorer());
|
||||||
|
|
||||||
|
if (scoreMode == ScoreMode.TOP_SCORES) {
|
||||||
|
iterator = impactsDisi;
|
||||||
|
}
|
||||||
|
|
||||||
|
return new SynonymScorer(this, queue, iterator, impactsDisi, simScorer);
|
||||||
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public boolean isCacheable(LeafReaderContext ctx) {
|
public boolean isCacheable(LeafReaderContext ctx) {
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
static class SynonymScorer extends DisjunctionScorer {
|
/**
|
||||||
private final LeafSimScorer similarity;
|
* Merge impacts for multiple synonyms.
|
||||||
|
*/
|
||||||
|
static ImpactsSource mergeImpacts(ImpactsEnum[] impactsEnums) {
|
||||||
|
return new ImpactsSource() {
|
||||||
|
|
||||||
|
class SubIterator {
|
||||||
|
final Iterator<Impact> iterator;
|
||||||
|
int previousFreq;
|
||||||
|
Impact current;
|
||||||
|
|
||||||
|
SubIterator(Iterator<Impact> iterator) {
|
||||||
|
this.iterator = iterator;
|
||||||
|
this.current = iterator.next();
|
||||||
|
}
|
||||||
|
|
||||||
|
void next() {
|
||||||
|
previousFreq = current.freq;
|
||||||
|
if (iterator.hasNext() == false) {
|
||||||
|
current = null;
|
||||||
|
} else {
|
||||||
|
current = iterator.next();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
SynonymScorer(LeafSimScorer similarity, Weight weight, List<Scorer> subScorers) {
|
|
||||||
super(weight, subScorers, true);
|
|
||||||
this.similarity = similarity;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
protected float score(DisiWrapper topList) throws IOException {
|
public Impacts getImpacts() throws IOException {
|
||||||
return similarity.score(topList.doc, tf(topList));
|
final Impacts[] impacts = new Impacts[impactsEnums.length];
|
||||||
|
// Use the impacts that have the lower next boundary as a lead.
|
||||||
|
// It will decide on the number of levels and the block boundaries.
|
||||||
|
Impacts tmpLead = null;
|
||||||
|
for (int i = 0; i < impactsEnums.length; ++i) {
|
||||||
|
impacts[i] = impactsEnums[i].getImpacts();
|
||||||
|
if (tmpLead == null || impacts[i].getDocIdUpTo(0) < tmpLead.getDocIdUpTo(0)) {
|
||||||
|
tmpLead = impacts[i];
|
||||||
|
}
|
||||||
|
}
|
||||||
|
final Impacts lead = tmpLead;
|
||||||
|
return new Impacts() {
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public int numLevels() {
|
||||||
|
// Delegate to the lead
|
||||||
|
return lead.numLevels();
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public int getDocIdUpTo(int level) {
|
||||||
|
// Delegate to the lead
|
||||||
|
return lead.getDocIdUpTo(level);
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Return the minimum level whose impacts are valid up to {@code docIdUpTo},
|
||||||
|
* or {@code -1} if there is no such level.
|
||||||
|
*/
|
||||||
|
private int getLevel(Impacts impacts, int docIdUpTo) {
|
||||||
|
for (int level = 0, numLevels = impacts.numLevels(); level < numLevels; ++level) {
|
||||||
|
if (impacts.getDocIdUpTo(level) >= docIdUpTo) {
|
||||||
|
return level;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public List<Impact> getImpacts(int level) {
|
||||||
|
final int docIdUpTo = getDocIdUpTo(level);
|
||||||
|
|
||||||
|
List<List<Impact>> toMerge = new ArrayList<>();
|
||||||
|
|
||||||
|
for (int i = 0; i < impactsEnums.length; ++i) {
|
||||||
|
if (impactsEnums[i].docID() <= docIdUpTo) {
|
||||||
|
int impactsLevel = getLevel(impacts[i], docIdUpTo);
|
||||||
|
if (impactsLevel == -1) {
|
||||||
|
// One instance doesn't have impacts that cover up to docIdUpTo
|
||||||
|
// Return impacts that trigger the maximum score
|
||||||
|
return Collections.singletonList(new Impact(Integer.MAX_VALUE, 1L));
|
||||||
|
}
|
||||||
|
toMerge.add(impacts[i].getImpacts(impactsLevel));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
assert toMerge.size() > 0; // otherwise it would mean the docID is > docIdUpTo, which is wrong
|
||||||
|
|
||||||
|
if (toMerge.size() == 1) {
|
||||||
|
// common if one synonym is common and the other one is rare
|
||||||
|
return toMerge.get(0);
|
||||||
|
}
|
||||||
|
|
||||||
|
PriorityQueue<SubIterator> pq = new PriorityQueue<SubIterator>(impacts.length) {
|
||||||
|
@Override
|
||||||
|
protected boolean lessThan(SubIterator a, SubIterator b) {
|
||||||
|
if (a.current == null) { // means iteration is finished
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
if (b.current == null) {
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
return Long.compareUnsigned(a.current.norm, b.current.norm) < 0;
|
||||||
|
}
|
||||||
|
};
|
||||||
|
for (List<Impact> impacts : toMerge) {
|
||||||
|
pq.add(new SubIterator(impacts.iterator()));
|
||||||
|
}
|
||||||
|
|
||||||
|
List<Impact> mergedImpacts = new ArrayList<>();
|
||||||
|
|
||||||
|
// Idea: merge impacts by norm. The tricky thing is that we need to
|
||||||
|
// consider norm values that are not in the impacts too. For
|
||||||
|
// instance if the list of impacts is [{freq=2,norm=10}, {freq=4,norm=12}],
|
||||||
|
// there might well be a document that has a freq of 2 and a length of 11,
|
||||||
|
// which was just not added to the list of impacts because {freq=2,norm=10}
|
||||||
|
// is more competitive. So the way it works is that we track the sum of
|
||||||
|
// the term freqs that we have seen so far in order to account for these
|
||||||
|
// implicit impacts.
|
||||||
|
|
||||||
|
long sumTf = 0;
|
||||||
|
SubIterator top = pq.top();
|
||||||
|
do {
|
||||||
|
final long norm = top.current.norm;
|
||||||
|
do {
|
||||||
|
sumTf += top.current.freq - top.previousFreq;
|
||||||
|
top.next();
|
||||||
|
top = pq.updateTop();
|
||||||
|
} while (top.current != null && top.current.norm == norm);
|
||||||
|
|
||||||
|
final int freqUpperBound = (int) Math.min(Integer.MAX_VALUE, sumTf);
|
||||||
|
if (mergedImpacts.isEmpty()) {
|
||||||
|
mergedImpacts.add(new Impact(freqUpperBound, norm));
|
||||||
|
} else {
|
||||||
|
Impact prevImpact = mergedImpacts.get(mergedImpacts.size() - 1);
|
||||||
|
assert Long.compareUnsigned(prevImpact.norm, norm) < 0;
|
||||||
|
if (freqUpperBound > prevImpact.freq) {
|
||||||
|
mergedImpacts.add(new Impact(freqUpperBound, norm));
|
||||||
|
} // otherwise the previous impact is already more competitive
|
||||||
|
}
|
||||||
|
} while (top.current != null);
|
||||||
|
|
||||||
|
return mergedImpacts;
|
||||||
|
}
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public void advanceShallow(int target) throws IOException {
|
||||||
|
for (ImpactsEnum impactsEnum : impactsEnums) {
|
||||||
|
if (impactsEnum.docID() < target) {
|
||||||
|
impactsEnum.advanceShallow(target);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
|
private static class SynonymScorer extends Scorer {
|
||||||
|
|
||||||
|
private final DisiPriorityQueue queue;
|
||||||
|
private final DocIdSetIterator iterator;
|
||||||
|
private final ImpactsDISI impactsDisi;
|
||||||
|
private final LeafSimScorer simScorer;
|
||||||
|
|
||||||
|
SynonymScorer(Weight weight, DisiPriorityQueue queue, DocIdSetIterator iterator,
|
||||||
|
ImpactsDISI impactsDisi, LeafSimScorer simScorer) {
|
||||||
|
super(weight);
|
||||||
|
this.queue = queue;
|
||||||
|
this.iterator = iterator;
|
||||||
|
this.impactsDisi = impactsDisi;
|
||||||
|
this.simScorer = simScorer;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public int docID() {
|
||||||
|
return iterator.docID();
|
||||||
|
}
|
||||||
|
|
||||||
|
int freq() throws IOException {
|
||||||
|
DisiWrapper w = queue.topList();
|
||||||
|
int freq = ((PostingsEnum) w.iterator).freq();
|
||||||
|
for (w = w.next; w != null; w = w.next) {
|
||||||
|
freq += ((PostingsEnum) w.iterator).freq();
|
||||||
|
if (freq < 0) { // overflow
|
||||||
|
return Integer.MAX_VALUE;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return freq;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public float score() throws IOException {
|
||||||
|
return simScorer.score(iterator.docID(), freq());
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public DocIdSetIterator iterator() {
|
||||||
|
return iterator;
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public float getMaxScore(int upTo) throws IOException {
|
public float getMaxScore(int upTo) throws IOException {
|
||||||
// TODO: merge impacts to get better score upper bounds
|
return impactsDisi.getMaxScore(upTo);
|
||||||
return similarity.getSimScorer().score(Float.MAX_VALUE, 1L);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/** combines TF of all subs. */
|
@Override
|
||||||
final int tf(DisiWrapper topList) throws IOException {
|
public int advanceShallow(int target) throws IOException {
|
||||||
int tf = 0;
|
return impactsDisi.advanceShallow(target);
|
||||||
for (DisiWrapper w = topList; w != null; w = w.next) {
|
|
||||||
tf += ((TermScorer)w.scorer).freq();
|
|
||||||
}
|
}
|
||||||
return tf;
|
|
||||||
|
@Override
|
||||||
|
public void setMinCompetitiveScore(float minScore) {
|
||||||
|
impactsDisi.setMinCompetitiveScore(minScore);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -111,7 +111,11 @@ public class TermQuery extends Query {
|
||||||
return null;
|
return null;
|
||||||
}
|
}
|
||||||
LeafSimScorer scorer = new LeafSimScorer(simScorer, context.reader(), term.field(), scoreMode.needsScores());
|
LeafSimScorer scorer = new LeafSimScorer(simScorer, context.reader(), term.field(), scoreMode.needsScores());
|
||||||
return new TermScorer(this, termsEnum, scoreMode, scorer);
|
if (scoreMode == ScoreMode.TOP_SCORES) {
|
||||||
|
return new TermScorer(this, termsEnum.impacts(PostingsEnum.FREQS), scorer);
|
||||||
|
} else {
|
||||||
|
return new TermScorer(this, termsEnum.postings(null, PostingsEnum.FREQS), scorer);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
|
|
|
@ -19,11 +19,9 @@ package org.apache.lucene.search;
|
||||||
|
|
||||||
import java.io.IOException;
|
import java.io.IOException;
|
||||||
|
|
||||||
import org.apache.lucene.index.Impacts;
|
|
||||||
import org.apache.lucene.index.ImpactsEnum;
|
import org.apache.lucene.index.ImpactsEnum;
|
||||||
import org.apache.lucene.index.PostingsEnum;
|
import org.apache.lucene.index.PostingsEnum;
|
||||||
import org.apache.lucene.index.SlowImpactsEnum;
|
import org.apache.lucene.index.SlowImpactsEnum;
|
||||||
import org.apache.lucene.index.TermsEnum;
|
|
||||||
|
|
||||||
/** Expert: A <code>Scorer</code> for documents matching a <code>Term</code>.
|
/** Expert: A <code>Scorer</code> for documents matching a <code>Term</code>.
|
||||||
*/
|
*/
|
||||||
|
@ -32,101 +30,29 @@ final class TermScorer extends Scorer {
|
||||||
private final ImpactsEnum impactsEnum;
|
private final ImpactsEnum impactsEnum;
|
||||||
private final DocIdSetIterator iterator;
|
private final DocIdSetIterator iterator;
|
||||||
private final LeafSimScorer docScorer;
|
private final LeafSimScorer docScorer;
|
||||||
private final MaxScoreCache maxScoreCache;
|
private final ImpactsDISI impactsDisi;
|
||||||
private float minCompetitiveScore;
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Construct a <code>TermScorer</code>.
|
* Construct a {@link TermScorer} that will iterate all documents.
|
||||||
*
|
|
||||||
* @param weight
|
|
||||||
* The weight of the <code>Term</code> in the query.
|
|
||||||
* @param te
|
|
||||||
* A {@link TermsEnum} positioned on the expected term.
|
|
||||||
* @param docScorer
|
|
||||||
* A {@link LeafSimScorer} for the appropriate field.
|
|
||||||
*/
|
*/
|
||||||
TermScorer(Weight weight, TermsEnum te, ScoreMode scoreMode, LeafSimScorer docScorer) throws IOException {
|
TermScorer(Weight weight, PostingsEnum postingsEnum, LeafSimScorer docScorer) {
|
||||||
super(weight);
|
super(weight);
|
||||||
this.docScorer = docScorer;
|
iterator = this.postingsEnum = postingsEnum;
|
||||||
if (scoreMode == ScoreMode.TOP_SCORES) {
|
|
||||||
impactsEnum = te.impacts(PostingsEnum.FREQS);
|
|
||||||
maxScoreCache = new MaxScoreCache(impactsEnum, docScorer.getSimScorer());
|
|
||||||
postingsEnum = impactsEnum;
|
|
||||||
iterator = new DocIdSetIterator() {
|
|
||||||
|
|
||||||
int upTo = -1;
|
|
||||||
float maxScore;
|
|
||||||
|
|
||||||
private int advanceTarget(int target) throws IOException {
|
|
||||||
if (minCompetitiveScore == 0) {
|
|
||||||
// no potential for skipping
|
|
||||||
return target;
|
|
||||||
}
|
|
||||||
|
|
||||||
if (target > upTo) {
|
|
||||||
impactsEnum.advanceShallow(target);
|
|
||||||
Impacts impacts = impactsEnum.getImpacts();
|
|
||||||
upTo = impacts.getDocIdUpTo(0);
|
|
||||||
maxScore = maxScoreCache.getMaxScoreForLevel(0);
|
|
||||||
}
|
|
||||||
|
|
||||||
while (true) {
|
|
||||||
assert upTo >= target;
|
|
||||||
|
|
||||||
if (maxScore >= minCompetitiveScore) {
|
|
||||||
return target;
|
|
||||||
}
|
|
||||||
|
|
||||||
if (upTo == NO_MORE_DOCS) {
|
|
||||||
return NO_MORE_DOCS;
|
|
||||||
}
|
|
||||||
|
|
||||||
impactsEnum.advanceShallow(upTo + 1);
|
|
||||||
Impacts impacts = impactsEnum.getImpacts();
|
|
||||||
final int level = maxScoreCache.getSkipLevel(minCompetitiveScore);
|
|
||||||
if (level >= 0) {
|
|
||||||
// we can skip more docs
|
|
||||||
int newUpTo = impacts.getDocIdUpTo(level);
|
|
||||||
if (newUpTo == NO_MORE_DOCS) {
|
|
||||||
return NO_MORE_DOCS;
|
|
||||||
}
|
|
||||||
target = newUpTo + 1;
|
|
||||||
impactsEnum.advanceShallow(target);
|
|
||||||
impacts = impactsEnum.getImpacts();
|
|
||||||
} else {
|
|
||||||
target = upTo + 1;
|
|
||||||
}
|
|
||||||
upTo = impacts.getDocIdUpTo(0);
|
|
||||||
maxScore = maxScoreCache.getMaxScoreForLevel(0);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public int advance(int target) throws IOException {
|
|
||||||
return impactsEnum.advance(advanceTarget(target));
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public int nextDoc() throws IOException {
|
|
||||||
return advance(impactsEnum.docID() + 1);
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public int docID() {
|
|
||||||
return impactsEnum.docID();
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public long cost() {
|
|
||||||
return impactsEnum.cost();
|
|
||||||
}
|
|
||||||
};
|
|
||||||
} else {
|
|
||||||
postingsEnum = te.postings(null, scoreMode.needsScores() ? PostingsEnum.FREQS : PostingsEnum.NONE);
|
|
||||||
impactsEnum = new SlowImpactsEnum(postingsEnum);
|
impactsEnum = new SlowImpactsEnum(postingsEnum);
|
||||||
maxScoreCache = new MaxScoreCache(impactsEnum, docScorer.getSimScorer());
|
impactsDisi = new ImpactsDISI(impactsEnum, impactsEnum, docScorer.getSimScorer());
|
||||||
iterator = postingsEnum;
|
this.docScorer = docScorer;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Construct a {@link TermScorer} that will use impacts to skip blocks of
|
||||||
|
* non-competitive documents.
|
||||||
|
*/
|
||||||
|
TermScorer(Weight weight, ImpactsEnum impactsEnum, LeafSimScorer docScorer) {
|
||||||
|
super(weight);
|
||||||
|
postingsEnum = this.impactsEnum = impactsEnum;
|
||||||
|
impactsDisi = new ImpactsDISI(impactsEnum, impactsEnum, docScorer.getSimScorer());
|
||||||
|
iterator = impactsDisi;
|
||||||
|
this.docScorer = docScorer;
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
|
@ -151,17 +77,17 @@ final class TermScorer extends Scorer {
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public int advanceShallow(int target) throws IOException {
|
public int advanceShallow(int target) throws IOException {
|
||||||
return maxScoreCache.advanceShallow(target);
|
return impactsDisi.advanceShallow(target);
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public float getMaxScore(int upTo) throws IOException {
|
public float getMaxScore(int upTo) throws IOException {
|
||||||
return maxScoreCache.getMaxScore(upTo);
|
return impactsDisi.getMaxScore(upTo);
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public void setMinCompetitiveScore(float minScore) {
|
public void setMinCompetitiveScore(float minScore) {
|
||||||
this.minCompetitiveScore = minScore;
|
impactsDisi.setMinCompetitiveScore(minScore);
|
||||||
}
|
}
|
||||||
|
|
||||||
/** Returns a string representation of this <code>TermScorer</code>. */
|
/** Returns a string representation of this <code>TermScorer</code>. */
|
||||||
|
|
|
@ -18,15 +18,27 @@ package org.apache.lucene.search;
|
||||||
|
|
||||||
|
|
||||||
import java.io.IOException;
|
import java.io.IOException;
|
||||||
|
import java.util.Arrays;
|
||||||
|
import java.util.List;
|
||||||
|
|
||||||
import org.apache.lucene.document.Document;
|
import org.apache.lucene.document.Document;
|
||||||
import org.apache.lucene.document.Field.Store;
|
import org.apache.lucene.document.Field.Store;
|
||||||
import org.apache.lucene.document.StringField;
|
import org.apache.lucene.document.StringField;
|
||||||
|
import org.apache.lucene.document.TextField;
|
||||||
|
import org.apache.lucene.index.DirectoryReader;
|
||||||
|
import org.apache.lucene.index.Impact;
|
||||||
|
import org.apache.lucene.index.Impacts;
|
||||||
|
import org.apache.lucene.index.ImpactsEnum;
|
||||||
|
import org.apache.lucene.index.ImpactsSource;
|
||||||
import org.apache.lucene.index.IndexReader;
|
import org.apache.lucene.index.IndexReader;
|
||||||
|
import org.apache.lucene.index.IndexWriter;
|
||||||
import org.apache.lucene.index.RandomIndexWriter;
|
import org.apache.lucene.index.RandomIndexWriter;
|
||||||
import org.apache.lucene.index.Term;
|
import org.apache.lucene.index.Term;
|
||||||
|
import org.apache.lucene.search.BooleanClause.Occur;
|
||||||
import org.apache.lucene.store.Directory;
|
import org.apache.lucene.store.Directory;
|
||||||
|
import org.apache.lucene.util.BytesRef;
|
||||||
import org.apache.lucene.util.LuceneTestCase;
|
import org.apache.lucene.util.LuceneTestCase;
|
||||||
|
import org.apache.lucene.util.TestUtil;
|
||||||
|
|
||||||
public class TestSynonymQuery extends LuceneTestCase {
|
public class TestSynonymQuery extends LuceneTestCase {
|
||||||
|
|
||||||
|
@ -54,6 +66,11 @@ public class TestSynonymQuery extends LuceneTestCase {
|
||||||
}
|
}
|
||||||
|
|
||||||
public void testScores() throws IOException {
|
public void testScores() throws IOException {
|
||||||
|
doTestScores(false);
|
||||||
|
doTestScores(true);
|
||||||
|
}
|
||||||
|
|
||||||
|
private void doTestScores(boolean trackTotalHits) throws IOException {
|
||||||
Directory dir = newDirectory();
|
Directory dir = newDirectory();
|
||||||
RandomIndexWriter w = new RandomIndexWriter(random(), dir);
|
RandomIndexWriter w = new RandomIndexWriter(random(), dir);
|
||||||
|
|
||||||
|
@ -71,8 +88,12 @@ public class TestSynonymQuery extends LuceneTestCase {
|
||||||
IndexSearcher searcher = newSearcher(reader);
|
IndexSearcher searcher = newSearcher(reader);
|
||||||
SynonymQuery query = new SynonymQuery(new Term("f", "a"), new Term("f", "b"));
|
SynonymQuery query = new SynonymQuery(new Term("f", "a"), new Term("f", "b"));
|
||||||
|
|
||||||
TopDocs topDocs = searcher.search(query, 20);
|
TopScoreDocCollector collector = TopScoreDocCollector.create(20, null, trackTotalHits);
|
||||||
|
searcher.search(query, collector);
|
||||||
|
TopDocs topDocs = collector.topDocs();
|
||||||
|
if (trackTotalHits) {
|
||||||
assertEquals(11, topDocs.totalHits);
|
assertEquals(11, topDocs.totalHits);
|
||||||
|
}
|
||||||
// All docs must have the same score
|
// All docs must have the same score
|
||||||
for (int i = 0; i < topDocs.scoreDocs.length; ++i) {
|
for (int i = 0; i < topDocs.scoreDocs.length; ++i) {
|
||||||
assertEquals(topDocs.scoreDocs[0].score, topDocs.scoreDocs[i].score, 0.0f);
|
assertEquals(topDocs.scoreDocs[0].score, topDocs.scoreDocs[i].score, 0.0f);
|
||||||
|
@ -83,4 +104,206 @@ public class TestSynonymQuery extends LuceneTestCase {
|
||||||
dir.close();
|
dir.close();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public void testMergeImpacts() throws IOException {
|
||||||
|
DummyImpactsEnum impacts1 = new DummyImpactsEnum();
|
||||||
|
impacts1.reset(42,
|
||||||
|
new Impact[][] {
|
||||||
|
new Impact[] { new Impact(3, 10), new Impact(5, 12), new Impact(8, 13) },
|
||||||
|
new Impact[] { new Impact(5, 11), new Impact(8, 13), new Impact(12, 14) }
|
||||||
|
},
|
||||||
|
new int[] {
|
||||||
|
110,
|
||||||
|
945
|
||||||
|
});
|
||||||
|
DummyImpactsEnum impacts2 = new DummyImpactsEnum();
|
||||||
|
impacts2.reset(45,
|
||||||
|
new Impact[][] {
|
||||||
|
new Impact[] { new Impact(2, 10), new Impact(6, 13) },
|
||||||
|
new Impact[] { new Impact(3, 9), new Impact(5, 11), new Impact(7, 13) }
|
||||||
|
},
|
||||||
|
new int[] {
|
||||||
|
90,
|
||||||
|
1000
|
||||||
|
});
|
||||||
|
|
||||||
|
ImpactsSource mergedImpacts = SynonymQuery.mergeImpacts(new ImpactsEnum[] { impacts1, impacts2 });
|
||||||
|
assertEquals(
|
||||||
|
new Impact[][] {
|
||||||
|
new Impact[] { new Impact(5, 10), new Impact(7, 12), new Impact(14, 13) },
|
||||||
|
new Impact[] { new Impact(Integer.MAX_VALUE, 1) }
|
||||||
|
},
|
||||||
|
new int[] {
|
||||||
|
90,
|
||||||
|
1000
|
||||||
|
},
|
||||||
|
mergedImpacts.getImpacts());
|
||||||
|
|
||||||
|
// docID is > the first doIdUpTo of impacts1
|
||||||
|
impacts2.reset(112,
|
||||||
|
new Impact[][] {
|
||||||
|
new Impact[] { new Impact(2, 10), new Impact(6, 13) },
|
||||||
|
new Impact[] { new Impact(3, 9), new Impact(5, 11), new Impact(7, 13) }
|
||||||
|
},
|
||||||
|
new int[] {
|
||||||
|
150,
|
||||||
|
1000
|
||||||
|
});
|
||||||
|
assertEquals(
|
||||||
|
new Impact[][] {
|
||||||
|
new Impact[] { new Impact(3, 10), new Impact(5, 12), new Impact(8, 13) }, // same as impacts1
|
||||||
|
new Impact[] { new Impact(3, 9), new Impact(10, 11), new Impact(15, 13), new Impact(19, 14) }
|
||||||
|
},
|
||||||
|
new int[] {
|
||||||
|
110,
|
||||||
|
945
|
||||||
|
},
|
||||||
|
mergedImpacts.getImpacts());
|
||||||
|
}
|
||||||
|
|
||||||
|
private static void assertEquals(Impact[][] impacts, int[] docIdUpTo, Impacts actual) {
|
||||||
|
assertEquals(impacts.length, actual.numLevels());
|
||||||
|
for (int i = 0; i < impacts.length; ++i) {
|
||||||
|
assertEquals(docIdUpTo[i], actual.getDocIdUpTo(i));
|
||||||
|
assertEquals(Arrays.asList(impacts[i]), actual.getImpacts(i));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
private static class DummyImpactsEnum extends ImpactsEnum {
|
||||||
|
|
||||||
|
private int docID;
|
||||||
|
private Impact[][] impacts;
|
||||||
|
private int[] docIdUpTo;
|
||||||
|
|
||||||
|
void reset(int docID, Impact[][] impacts, int[] docIdUpTo) {
|
||||||
|
this.docID = docID;
|
||||||
|
this.impacts = impacts;
|
||||||
|
this.docIdUpTo = docIdUpTo;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public void advanceShallow(int target) throws IOException {
|
||||||
|
throw new UnsupportedOperationException();
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public Impacts getImpacts() throws IOException {
|
||||||
|
return new Impacts() {
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public int numLevels() {
|
||||||
|
return impacts.length;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public int getDocIdUpTo(int level) {
|
||||||
|
return docIdUpTo[level];
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public List<Impact> getImpacts(int level) {
|
||||||
|
return Arrays.asList(impacts[level]);
|
||||||
|
}
|
||||||
|
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public int freq() throws IOException {
|
||||||
|
throw new UnsupportedOperationException();
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public int nextPosition() throws IOException {
|
||||||
|
throw new UnsupportedOperationException();
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public int startOffset() throws IOException {
|
||||||
|
throw new UnsupportedOperationException();
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public int endOffset() throws IOException {
|
||||||
|
throw new UnsupportedOperationException();
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public BytesRef getPayload() throws IOException {
|
||||||
|
throw new UnsupportedOperationException();
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public int docID() {
|
||||||
|
return docID;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public int nextDoc() throws IOException {
|
||||||
|
throw new UnsupportedOperationException();
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public int advance(int target) throws IOException {
|
||||||
|
throw new UnsupportedOperationException();
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public long cost() {
|
||||||
|
throw new UnsupportedOperationException();
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
public void testRandomTopDocs() throws IOException {
|
||||||
|
Directory dir = newDirectory();
|
||||||
|
IndexWriter w = new IndexWriter(dir, newIndexWriterConfig());
|
||||||
|
int numDocs = atLeast(128 * 8 * 8 * 3); // make sure some terms have skip data
|
||||||
|
for (int i = 0; i < numDocs; ++i) {
|
||||||
|
Document doc = new Document();
|
||||||
|
int numValues = random().nextInt(1 << random().nextInt(5));
|
||||||
|
int start = random().nextInt(10);
|
||||||
|
for (int j = 0; j < numValues; ++j) {
|
||||||
|
int freq = TestUtil.nextInt(random(), 1, 1 << random().nextInt(3));
|
||||||
|
for (int k = 0; k < freq; ++k) {
|
||||||
|
doc.add(new TextField("foo", Integer.toString(start + j), Store.NO));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
w.addDocument(doc);
|
||||||
|
}
|
||||||
|
IndexReader reader = DirectoryReader.open(w);
|
||||||
|
w.close();
|
||||||
|
IndexSearcher searcher = newSearcher(reader);
|
||||||
|
|
||||||
|
for (int term1 = 0; term1 < 15; ++term1) {
|
||||||
|
int term2;
|
||||||
|
do {
|
||||||
|
term2 = random().nextInt(15);
|
||||||
|
} while (term1 == term2);
|
||||||
|
Query query = new SynonymQuery(
|
||||||
|
new Term[] {
|
||||||
|
new Term("foo", Integer.toString(term1)),
|
||||||
|
new Term("foo", Integer.toString(term2))});
|
||||||
|
|
||||||
|
TopScoreDocCollector collector1 = TopScoreDocCollector.create(10, null, true); // COMPLETE
|
||||||
|
TopScoreDocCollector collector2 = TopScoreDocCollector.create(10, null, false); // TOP_SCORES
|
||||||
|
|
||||||
|
searcher.search(query, collector1);
|
||||||
|
searcher.search(query, collector2);
|
||||||
|
CheckHits.checkEqual(query, collector1.topDocs().scoreDocs, collector2.topDocs().scoreDocs);
|
||||||
|
|
||||||
|
int filterTerm = random().nextInt(15);
|
||||||
|
Query filteredQuery = new BooleanQuery.Builder()
|
||||||
|
.add(query, Occur.MUST)
|
||||||
|
.add(new TermQuery(new Term("foo", Integer.toString(filterTerm))), Occur.FILTER)
|
||||||
|
.build();
|
||||||
|
|
||||||
|
collector1 = TopScoreDocCollector.create(10, null, true); // COMPLETE
|
||||||
|
collector2 = TopScoreDocCollector.create(10, null, false); // TOP_SCORES
|
||||||
|
searcher.search(filteredQuery, collector1);
|
||||||
|
searcher.search(filteredQuery, collector2);
|
||||||
|
CheckHits.checkEqual(query, collector1.topDocs().scoreDocs, collector2.topDocs().scoreDocs);
|
||||||
|
}
|
||||||
|
reader.close();
|
||||||
|
dir.close();
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -24,7 +24,7 @@ import java.util.List;
|
||||||
import org.apache.lucene.analysis.MockAnalyzer;
|
import org.apache.lucene.analysis.MockAnalyzer;
|
||||||
import org.apache.lucene.document.Document;
|
import org.apache.lucene.document.Document;
|
||||||
import org.apache.lucene.document.Field;
|
import org.apache.lucene.document.Field;
|
||||||
import org.apache.lucene.document.StringField;
|
import org.apache.lucene.document.TextField;
|
||||||
import org.apache.lucene.document.Field.Store;
|
import org.apache.lucene.document.Field.Store;
|
||||||
import org.apache.lucene.index.DirectoryReader;
|
import org.apache.lucene.index.DirectoryReader;
|
||||||
import org.apache.lucene.index.FilterLeafReader;
|
import org.apache.lucene.index.FilterLeafReader;
|
||||||
|
@ -39,6 +39,7 @@ import org.apache.lucene.search.BooleanClause.Occur;
|
||||||
import org.apache.lucene.search.similarities.ClassicSimilarity;
|
import org.apache.lucene.search.similarities.ClassicSimilarity;
|
||||||
import org.apache.lucene.store.Directory;
|
import org.apache.lucene.store.Directory;
|
||||||
import org.apache.lucene.util.LuceneTestCase;
|
import org.apache.lucene.util.LuceneTestCase;
|
||||||
|
import org.apache.lucene.util.TestUtil;
|
||||||
|
|
||||||
public class TestTermScorer extends LuceneTestCase {
|
public class TestTermScorer extends LuceneTestCase {
|
||||||
protected Directory directory;
|
protected Directory directory;
|
||||||
|
@ -218,7 +219,10 @@ public class TestTermScorer extends LuceneTestCase {
|
||||||
int numValues = random().nextInt(1 << random().nextInt(5));
|
int numValues = random().nextInt(1 << random().nextInt(5));
|
||||||
int start = random().nextInt(10);
|
int start = random().nextInt(10);
|
||||||
for (int j = 0; j < numValues; ++j) {
|
for (int j = 0; j < numValues; ++j) {
|
||||||
doc.add(new StringField("foo", Integer.toString(start + j), Store.NO));
|
int freq = TestUtil.nextInt(random(), 1, 1 << random().nextInt(3));
|
||||||
|
for (int k = 0; k < freq; ++k) {
|
||||||
|
doc.add(new TextField("foo", Integer.toString(start + j), Store.NO));
|
||||||
|
}
|
||||||
}
|
}
|
||||||
w.addDocument(doc);
|
w.addDocument(doc);
|
||||||
}
|
}
|
||||||
|
@ -234,7 +238,7 @@ public class TestTermScorer extends LuceneTestCase {
|
||||||
|
|
||||||
searcher.search(query, collector1);
|
searcher.search(query, collector1);
|
||||||
searcher.search(query, collector2);
|
searcher.search(query, collector2);
|
||||||
assertTopDocsEquals(collector1.topDocs(), collector2.topDocs());
|
CheckHits.checkEqual(query, collector1.topDocs().scoreDocs, collector2.topDocs().scoreDocs);
|
||||||
|
|
||||||
int filterTerm = random().nextInt(15);
|
int filterTerm = random().nextInt(15);
|
||||||
Query filteredQuery = new BooleanQuery.Builder()
|
Query filteredQuery = new BooleanQuery.Builder()
|
||||||
|
@ -246,19 +250,10 @@ public class TestTermScorer extends LuceneTestCase {
|
||||||
collector2 = TopScoreDocCollector.create(10, null, false); // TOP_SCORES
|
collector2 = TopScoreDocCollector.create(10, null, false); // TOP_SCORES
|
||||||
searcher.search(filteredQuery, collector1);
|
searcher.search(filteredQuery, collector1);
|
||||||
searcher.search(filteredQuery, collector2);
|
searcher.search(filteredQuery, collector2);
|
||||||
assertTopDocsEquals(collector1.topDocs(), collector2.topDocs());
|
CheckHits.checkEqual(query, collector1.topDocs().scoreDocs, collector2.topDocs().scoreDocs);
|
||||||
}
|
}
|
||||||
reader.close();
|
reader.close();
|
||||||
dir.close();
|
dir.close();
|
||||||
}
|
}
|
||||||
|
|
||||||
private static void assertTopDocsEquals(TopDocs td1, TopDocs td2) {
|
|
||||||
assertEquals(td1.scoreDocs.length, td2.scoreDocs.length);
|
|
||||||
for (int i = 0; i < td1.scoreDocs.length; ++i) {
|
|
||||||
ScoreDoc sd1 = td1.scoreDocs[i];
|
|
||||||
ScoreDoc sd2 = td2.scoreDocs[i];
|
|
||||||
assertEquals(sd1.doc, sd2.doc);
|
|
||||||
assertEquals(sd1.score, sd2.score, 0f);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
Loading…
Reference in New Issue