mirror of https://github.com/apache/lucene.git
LUCENE-124: Add a topterms rewrite method that scores terms only by boost value
git-svn-id: https://svn.apache.org/repos/asf/lucene/java/trunk@920499 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
d9ff09b759
commit
8c0dcbb8c3
|
@ -77,6 +77,12 @@ API Changes
|
||||||
callers can use to query whether a certain file matches a certain extension.
|
callers can use to query whether a certain file matches a certain extension.
|
||||||
(Shai Erera via Mike McCandless)
|
(Shai Erera via Mike McCandless)
|
||||||
|
|
||||||
|
* LUCENE-124: Add a TopTermsBoostOnlyBooleanQueryRewrite to MultiTermQuery.
|
||||||
|
This rewrite method is similar to TopTermsScoringBooleanQueryRewrite, but
|
||||||
|
only scores terms by their boost values. For example, this can be used
|
||||||
|
with FuzzyQuery to ensure that exact matches are always scored higher,
|
||||||
|
because only the boost will be used in scoring. (Robert Muir)
|
||||||
|
|
||||||
Bug fixes
|
Bug fixes
|
||||||
|
|
||||||
* LUCENE-2119: Don't throw NegativeArraySizeException if you pass
|
* LUCENE-2119: Don't throw NegativeArraySizeException if you pass
|
||||||
|
|
|
@ -162,38 +162,36 @@ public abstract class MultiTermQuery extends Query {
|
||||||
* @see #setRewriteMethod */
|
* @see #setRewriteMethod */
|
||||||
public final static RewriteMethod SCORING_BOOLEAN_QUERY_REWRITE = new ScoringBooleanQueryRewrite();
|
public final static RewriteMethod SCORING_BOOLEAN_QUERY_REWRITE = new ScoringBooleanQueryRewrite();
|
||||||
|
|
||||||
/** A rewrite method that first translates each term into
|
|
||||||
* {@link BooleanClause.Occur#SHOULD} clause in a
|
/**
|
||||||
* BooleanQuery, and keeps the scores as computed by the
|
* Base rewrite method for collecting only the top terms
|
||||||
* query.
|
* via a priority queue.
|
||||||
*
|
*/
|
||||||
* <p>This rewrite mode only uses the top scoring terms
|
public static abstract class TopTermsBooleanQueryRewrite extends BooleanQueryRewrite {
|
||||||
* so it will not overflow the boolean max clause count.
|
|
||||||
* It is the default rewrite mode for {@link FuzzyQuery}.
|
|
||||||
*
|
|
||||||
* @see #setRewriteMethod */
|
|
||||||
public static final class TopTermsScoringBooleanQueryRewrite extends BooleanQueryRewrite {
|
|
||||||
private final int size;
|
private final int size;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Create a TopTermsScoringBooleanQueryRewrite for
|
* Create a TopTermsBooleanQueryRewrite for
|
||||||
* at most <code>size</code> terms.
|
* at most <code>size</code> terms.
|
||||||
* <p>
|
* <p>
|
||||||
* NOTE: if {@link BooleanQuery#getMaxClauseCount} is smaller than
|
* NOTE: if {@link BooleanQuery#getMaxClauseCount} is smaller than
|
||||||
* <code>size</code>, then it will be used instead.
|
* <code>size</code>, then it will be used instead.
|
||||||
*/
|
*/
|
||||||
public TopTermsScoringBooleanQueryRewrite(int size) {
|
public TopTermsBooleanQueryRewrite(int size) {
|
||||||
this.size = size;
|
this.size = size;
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Create a TopTermsScoringBooleanQueryRewrite that is limited
|
* Create a TopTermsBooleanQueryRewrite that is limited
|
||||||
* to at most {@link BooleanQuery#getMaxClauseCount} terms.
|
* to at most {@link BooleanQuery#getMaxClauseCount} terms.
|
||||||
*/
|
*/
|
||||||
public TopTermsScoringBooleanQueryRewrite() {
|
public TopTermsBooleanQueryRewrite() {
|
||||||
this(Integer.MAX_VALUE);
|
this(Integer.MAX_VALUE);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/** Return a suitable Query for a MultiTermQuery term. */
|
||||||
|
protected abstract Query getQuery(Term term);
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public Query rewrite(IndexReader reader, MultiTermQuery query) throws IOException {
|
public Query rewrite(IndexReader reader, MultiTermQuery query) throws IOException {
|
||||||
final int maxSize = Math.min(size, BooleanQuery.getMaxClauseCount());
|
final int maxSize = Math.min(size, BooleanQuery.getMaxClauseCount());
|
||||||
|
@ -218,7 +216,7 @@ public abstract class MultiTermQuery extends Query {
|
||||||
|
|
||||||
final BooleanQuery bq = new BooleanQuery(true);
|
final BooleanQuery bq = new BooleanQuery(true);
|
||||||
for (final ScoreTerm st : stQueue) {
|
for (final ScoreTerm st : stQueue) {
|
||||||
TermQuery tq = new TermQuery(st.term); // found a match
|
Query tq = getQuery(st.term); // found a match
|
||||||
tq.setBoost(query.getBoost() * st.boost); // set the boost
|
tq.setBoost(query.getBoost() * st.boost); // set the boost
|
||||||
bq.add(tq, BooleanClause.Occur.SHOULD); // add to query
|
bq.add(tq, BooleanClause.Occur.SHOULD); // add to query
|
||||||
}
|
}
|
||||||
|
@ -239,7 +237,7 @@ public abstract class MultiTermQuery extends Query {
|
||||||
if (this == obj) return true;
|
if (this == obj) return true;
|
||||||
if (obj == null) return false;
|
if (obj == null) return false;
|
||||||
if (getClass() != obj.getClass()) return false;
|
if (getClass() != obj.getClass()) return false;
|
||||||
TopTermsScoringBooleanQueryRewrite other = (TopTermsScoringBooleanQueryRewrite) obj;
|
TopTermsBooleanQueryRewrite other = (TopTermsBooleanQueryRewrite) obj;
|
||||||
if (size != other.size) return false;
|
if (size != other.size) return false;
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
@ -257,6 +255,84 @@ public abstract class MultiTermQuery extends Query {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* A rewrite method that first translates each term into
|
||||||
|
* {@link BooleanClause.Occur#SHOULD} clause in a BooleanQuery, and keeps the
|
||||||
|
* scores as computed by the query.
|
||||||
|
*
|
||||||
|
* <p>
|
||||||
|
* This rewrite mode only uses the top scoring terms so it will not overflow
|
||||||
|
* the boolean max clause count. It is the default rewrite mode for
|
||||||
|
* {@link FuzzyQuery}.
|
||||||
|
*
|
||||||
|
* @see #setRewriteMethod
|
||||||
|
*/
|
||||||
|
public static final class TopTermsScoringBooleanQueryRewrite extends
|
||||||
|
TopTermsBooleanQueryRewrite {
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Create a TopTermsScoringBooleanQueryRewrite that is limited
|
||||||
|
* to at most {@link BooleanQuery#getMaxClauseCount} terms.
|
||||||
|
*/
|
||||||
|
public TopTermsScoringBooleanQueryRewrite() {
|
||||||
|
super();
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Create a TopTermsScoringBooleanQueryRewrite for
|
||||||
|
* at most <code>size</code> terms.
|
||||||
|
* <p>
|
||||||
|
* NOTE: if {@link BooleanQuery#getMaxClauseCount} is smaller than
|
||||||
|
* <code>size</code>, then it will be used instead.
|
||||||
|
*/
|
||||||
|
public TopTermsScoringBooleanQueryRewrite(int size) {
|
||||||
|
super(size);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
protected Query getQuery(Term term) {
|
||||||
|
return new TermQuery(term);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* A rewrite method that first translates each term into
|
||||||
|
* {@link BooleanClause.Occur#SHOULD} clause in a BooleanQuery, but the scores
|
||||||
|
* are only computed as the boost.
|
||||||
|
* <p>
|
||||||
|
* This rewrite method only uses the top scoring terms so it will not overflow
|
||||||
|
* the boolean max clause count.
|
||||||
|
*
|
||||||
|
* @see #setRewriteMethod
|
||||||
|
*/
|
||||||
|
public static final class TopTermsBoostOnlyBooleanQueryRewrite extends
|
||||||
|
TopTermsBooleanQueryRewrite {
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Create a TopTermsBoostOnlyBooleanQueryRewrite that is limited
|
||||||
|
* to at most {@link BooleanQuery#getMaxClauseCount} terms.
|
||||||
|
*/
|
||||||
|
public TopTermsBoostOnlyBooleanQueryRewrite() {
|
||||||
|
super();
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Create a TopTermsBoostOnlyBooleanQueryRewrite for
|
||||||
|
* at most <code>size</code> terms.
|
||||||
|
* <p>
|
||||||
|
* NOTE: if {@link BooleanQuery#getMaxClauseCount} is smaller than
|
||||||
|
* <code>size</code>, then it will be used instead.
|
||||||
|
*/
|
||||||
|
public TopTermsBoostOnlyBooleanQueryRewrite(int size) {
|
||||||
|
super(size);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
protected Query getQuery(Term term) {
|
||||||
|
return new ConstantScoreQuery(new QueryWrapperFilter(new TermQuery(term)));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
private static class ConstantScoreBooleanQueryRewrite extends ScoringBooleanQueryRewrite implements Serializable {
|
private static class ConstantScoreBooleanQueryRewrite extends ScoringBooleanQueryRewrite implements Serializable {
|
||||||
@Override
|
@Override
|
||||||
public Query rewrite(IndexReader reader, MultiTermQuery query) throws IOException {
|
public Query rewrite(IndexReader reader, MultiTermQuery query) throws IOException {
|
||||||
|
|
|
@ -311,6 +311,30 @@ public class TestFuzzyQuery extends LuceneTestCase {
|
||||||
assertEquals(0, hits.length);
|
assertEquals(0, hits.length);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/** Test the TopTermsBoostOnlyBooleanQueryRewrite rewrite method. */
|
||||||
|
public void testBoostOnlyRewrite() throws Exception {
|
||||||
|
RAMDirectory directory = new RAMDirectory();
|
||||||
|
IndexWriter writer = new IndexWriter(directory, new WhitespaceAnalyzer(TEST_VERSION_CURRENT),
|
||||||
|
true, IndexWriter.MaxFieldLength.LIMITED);
|
||||||
|
addDoc("Lucene", writer);
|
||||||
|
addDoc("Lucene", writer);
|
||||||
|
addDoc("Lucenne", writer);
|
||||||
|
writer.optimize();
|
||||||
|
writer.close();
|
||||||
|
IndexSearcher searcher = new IndexSearcher(directory, true);
|
||||||
|
IndexReader reader = searcher.getIndexReader();
|
||||||
|
FuzzyQuery query = new FuzzyQuery(new Term("field", "Lucene"));
|
||||||
|
query.setRewriteMethod(new MultiTermQuery.TopTermsBoostOnlyBooleanQueryRewrite());
|
||||||
|
ScoreDoc[] hits = searcher.search(query, null, 1000).scoreDocs;
|
||||||
|
assertEquals(3, hits.length);
|
||||||
|
// normally, 'Lucenne' would be the first result as IDF will skew the score.
|
||||||
|
assertEquals("Lucene", reader.document(hits[0].doc).get("field"));
|
||||||
|
assertEquals("Lucene", reader.document(hits[1].doc).get("field"));
|
||||||
|
assertEquals("Lucenne", reader.document(hits[2].doc).get("field"));
|
||||||
|
searcher.close();
|
||||||
|
reader.close();
|
||||||
|
}
|
||||||
|
|
||||||
public void testGiga() throws Exception {
|
public void testGiga() throws Exception {
|
||||||
|
|
||||||
StandardAnalyzer analyzer = new StandardAnalyzer(TEST_VERSION_CURRENT);
|
StandardAnalyzer analyzer = new StandardAnalyzer(TEST_VERSION_CURRENT);
|
||||||
|
|
Loading…
Reference in New Issue