Patch #33472. Disable coord() in automatically generated queries.

git-svn-id: https://svn.apache.org/repos/asf/lucene/java/trunk@156438 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Doug Cutting 2005-03-07 19:26:27 +00:00
parent a606c0890c
commit 4d1970ebab
13 changed files with 157 additions and 18 deletions

View File

@ -116,6 +116,15 @@ Bug fixes
5. Small bug in skipTo of ConjunctionScorer that caused NullPointerException 5. Small bug in skipTo of ConjunctionScorer that caused NullPointerException
if skipTo() was called without prior call to next() fixed. (Christoph) if skipTo() was called without prior call to next() fixed. (Christoph)
6. Disable Similiarty.coord() in the scoring of most automatically
generated boolean queries. The coord() score factor is
appropriate when clauses are independently specified by a user,
but is usually not appropriate when clauses are generated
automatically, e.g., by a fuzzy, wildcard or range query. Matches
on such automatically generated queries are no longer penalized
for not matching all terms. (Doug Cutting, Patch #33472)
Optimizations Optimizations
1. Disk usage (peak requirements during indexing and optimization) 1. Disk usage (peak requirements during indexing and optimization)

View File

@ -63,7 +63,7 @@ public class MultiFieldQueryParser extends QueryParser
for (int i = 0; i < fields.length; i++) for (int i = 0; i < fields.length; i++)
clauses.add(new BooleanClause(super.getFieldQuery(fields[i], queryText), clauses.add(new BooleanClause(super.getFieldQuery(fields[i], queryText),
BooleanClause.Occur.SHOULD)); BooleanClause.Occur.SHOULD));
return getBooleanQuery(clauses); return getBooleanQuery(clauses, true);
} }
return super.getFieldQuery(field, queryText); return super.getFieldQuery(field, queryText);
} }
@ -95,7 +95,7 @@ public class MultiFieldQueryParser extends QueryParser
clauses.add(new BooleanClause(super.getFuzzyQuery(fields[i], termStr, minSimilarity), clauses.add(new BooleanClause(super.getFuzzyQuery(fields[i], termStr, minSimilarity),
BooleanClause.Occur.SHOULD)); BooleanClause.Occur.SHOULD));
} }
return getBooleanQuery(clauses); return getBooleanQuery(clauses, true);
} }
return super.getFuzzyQuery(field, termStr, minSimilarity); return super.getFuzzyQuery(field, termStr, minSimilarity);
} }
@ -108,7 +108,7 @@ public class MultiFieldQueryParser extends QueryParser
clauses.add(new BooleanClause(super.getPrefixQuery(fields[i], termStr), clauses.add(new BooleanClause(super.getPrefixQuery(fields[i], termStr),
BooleanClause.Occur.SHOULD)); BooleanClause.Occur.SHOULD));
} }
return getBooleanQuery(clauses); return getBooleanQuery(clauses, true);
} }
return super.getPrefixQuery(field, termStr); return super.getPrefixQuery(field, termStr);
} }
@ -128,7 +128,7 @@ public class MultiFieldQueryParser extends QueryParser
clauses.add(new BooleanClause(super.getRangeQuery(fields[i], part1, part2, inclusive), clauses.add(new BooleanClause(super.getRangeQuery(fields[i], part1, part2, inclusive),
BooleanClause.Occur.SHOULD)); BooleanClause.Occur.SHOULD));
} }
return getBooleanQuery(clauses); return getBooleanQuery(clauses, true);
} }
return super.getRangeQuery(field, part1, part2, inclusive); return super.getRangeQuery(field, part1, part2, inclusive);
} }

View File

@ -393,7 +393,7 @@ public class QueryParser implements QueryParserConstants {
if (severalTokensAtSamePosition) { if (severalTokensAtSamePosition) {
if (positionCount == 1) { if (positionCount == 1) {
// no phrase query: // no phrase query:
BooleanQuery q = new BooleanQuery(); BooleanQuery q = new BooleanQuery(true);
for (int i = 0; i < v.size(); i++) { for (int i = 0; i < v.size(); i++) {
t = (org.apache.lucene.analysis.Token) v.elementAt(i); t = (org.apache.lucene.analysis.Token) v.elementAt(i);
TermQuery currentQuery = new TermQuery( TermQuery currentQuery = new TermQuery(
@ -521,9 +521,27 @@ public class QueryParser implements QueryParserConstants {
* @return Resulting {@link Query} object. * @return Resulting {@link Query} object.
* @exception ParseException throw in overridden method to disallow * @exception ParseException throw in overridden method to disallow
*/ */
protected Query getBooleanQuery(Vector clauses) throws ParseException protected Query getBooleanQuery(Vector clauses) throws ParseException {
return getBooleanQuery(clauses, false);
}
/**
* Factory method for generating query, given a set of clauses.
* By default creates a boolean query composed of clauses passed in.
*
* Can be overridden by extending classes, to modify query being
* returned.
*
* @param clauses Vector that contains {@link BooleanClause} instances
* to join.
* @param disableCoord true if coord scoring should be disabled.
*
* @return Resulting {@link Query} object.
* @exception ParseException throw in overridden method to disallow
*/
protected Query getBooleanQuery(Vector clauses, boolean disableCoord)
throws ParseException
{ {
BooleanQuery query = new BooleanQuery(); BooleanQuery query = new BooleanQuery(disableCoord);
for (int i = 0; i < clauses.size(); i++) { for (int i = 0; i < clauses.size(); i++) {
query.add((BooleanClause)clauses.elementAt(i)); query.add((BooleanClause)clauses.elementAt(i));
} }

View File

@ -416,7 +416,7 @@ public class QueryParser {
if (severalTokensAtSamePosition) { if (severalTokensAtSamePosition) {
if (positionCount == 1) { if (positionCount == 1) {
// no phrase query: // no phrase query:
BooleanQuery q = new BooleanQuery(); BooleanQuery q = new BooleanQuery(true);
for (int i = 0; i < v.size(); i++) { for (int i = 0; i < v.size(); i++) {
t = (org.apache.lucene.analysis.Token) v.elementAt(i); t = (org.apache.lucene.analysis.Token) v.elementAt(i);
TermQuery currentQuery = new TermQuery( TermQuery currentQuery = new TermQuery(
@ -544,9 +544,27 @@ public class QueryParser {
* @return Resulting {@link Query} object. * @return Resulting {@link Query} object.
* @exception ParseException throw in overridden method to disallow * @exception ParseException throw in overridden method to disallow
*/ */
protected Query getBooleanQuery(Vector clauses) throws ParseException protected Query getBooleanQuery(Vector clauses) throws ParseException {
getBooleanQuery(clauses, false);
}
/**
* Factory method for generating query, given a set of clauses.
* By default creates a boolean query composed of clauses passed in.
*
* Can be overridden by extending classes, to modify query being
* returned.
*
* @param clauses Vector that contains {@link BooleanClause} instances
* to join.
* @param disableCoord true if coord scoring should be disabled.
*
* @return Resulting {@link Query} object.
* @exception ParseException throw in overridden method to disallow
*/
protected Query getBooleanQuery(Vector clauses, boolean disableCoord)
throws ParseException
{ {
BooleanQuery query = new BooleanQuery(); BooleanQuery query = new BooleanQuery(disableCoord);
for (int i = 0; i < clauses.size(); i++) { for (int i = 0; i < clauses.size(); i++) {
query.add((BooleanClause)clauses.elementAt(i)); query.add((BooleanClause)clauses.elementAt(i));
} }

View File

@ -63,10 +63,44 @@ public class BooleanQuery extends Query {
} }
private Vector clauses = new Vector(); private Vector clauses = new Vector();
private boolean disableCoord;
/** Constructs an empty boolean query. */ /** Constructs an empty boolean query. */
public BooleanQuery() {} public BooleanQuery() {}
/** Constructs an empty boolean query.
*
* {@link Similarity#coord(int,int)} may be disabled in scoring, as
* appropriate. For example, this score factor does not make sense for most
* automatically generated queries, like {@link WildCardQuery} and {@link
* FuzzyQuery}.
*
* @param disableCoord disables {@link Similarity#coord(int,int)} in scoring.
*/
public BooleanQuery(boolean disableCoord) {
this.disableCoord = disableCoord;
}
/** Returns true iff {@link Similarity#coord(int,int)} is disabled in
* scoring for this query instance.
* @see BooleanQuery(boolean)
*/
public boolean isCoordDisabled() { return disableCoord; }
// Implement coord disabling.
// Inherit javadoc.
public Similarity getSimilarity(Searcher searcher) {
Similarity result = super.getSimilarity(searcher);
if (disableCoord) { // disable coord as requested
result = new SimilarityDelegator(result) {
public float coord(int overlap, int maxOverlap) {
return 1.0f;
}
};
}
return result;
}
/** Adds a clause to a boolean query. Clauses may be: /** Adds a clause to a boolean query. Clauses may be:
* <ul> * <ul>
* <li><code>required</code> which means that documents which <i>do not</i> * <li><code>required</code> which means that documents which <i>do not</i>

View File

@ -122,7 +122,7 @@ public final class FuzzyQuery extends MultiTermQuery {
enumerator.close(); enumerator.close();
} }
BooleanQuery query = new BooleanQuery(); BooleanQuery query = new BooleanQuery(true);
int size = stQueue.size(); int size = stQueue.size();
for(int i = 0; i < size; i++){ for(int i = 0; i < size; i++){
ScoreTerm st = (ScoreTerm) stQueue.pop(); ScoreTerm st = (ScoreTerm) stQueue.pop();

View File

@ -228,7 +228,7 @@ public class MultiPhraseQuery extends Query {
public Query rewrite(IndexReader reader) { public Query rewrite(IndexReader reader) {
if (termArrays.size() == 1) { // optimize one-term case if (termArrays.size() == 1) { // optimize one-term case
Term[] terms = (Term[])termArrays.get(0); Term[] terms = (Term[])termArrays.get(0);
BooleanQuery boq = new BooleanQuery(); BooleanQuery boq = new BooleanQuery(true);
for (int i=0; i<terms.length; i++) { for (int i=0; i<terms.length; i++) {
boq.add(new TermQuery(terms[i]), BooleanClause.Occur.SHOULD); boq.add(new TermQuery(terms[i]), BooleanClause.Occur.SHOULD);
} }

View File

@ -51,7 +51,7 @@ public abstract class MultiTermQuery extends Query {
public Query rewrite(IndexReader reader) throws IOException { public Query rewrite(IndexReader reader) throws IOException {
FilteredTermEnum enumerator = getEnum(reader); FilteredTermEnum enumerator = getEnum(reader);
BooleanQuery query = new BooleanQuery(); BooleanQuery query = new BooleanQuery(true);
try { try {
do { do {
Term t = enumerator.term(); Term t = enumerator.term();

View File

@ -229,7 +229,7 @@ public class PhrasePrefixQuery extends Query {
protected Weight createWeight(Searcher searcher) { protected Weight createWeight(Searcher searcher) {
if (termArrays.size() == 1) { // optimize one-term case if (termArrays.size() == 1) { // optimize one-term case
Term[] terms = (Term[])termArrays.get(0); Term[] terms = (Term[])termArrays.get(0);
BooleanQuery boq = new BooleanQuery(); BooleanQuery boq = new BooleanQuery(true);
for (int i=0; i<terms.length; i++) { for (int i=0; i<terms.length; i++) {
boq.add(new TermQuery(terms[i]), BooleanClause.Occur.SHOULD); boq.add(new TermQuery(terms[i]), BooleanClause.Occur.SHOULD);
} }

View File

@ -35,7 +35,7 @@ public class PrefixQuery extends Query {
public Term getPrefix() { return prefix; } public Term getPrefix() { return prefix; }
public Query rewrite(IndexReader reader) throws IOException { public Query rewrite(IndexReader reader) throws IOException {
BooleanQuery query = new BooleanQuery(); BooleanQuery query = new BooleanQuery(true);
TermEnum enumerator = reader.terms(prefix); TermEnum enumerator = reader.terms(prefix);
try { try {
String prefixText = prefix.text(); String prefixText = prefix.text();

View File

@ -125,7 +125,9 @@ public abstract class Query implements java.io.Serializable, Cloneable {
} }
} }
BooleanQuery result = new BooleanQuery(); boolean coordDisabled =
queries.length==0? false : ((BooleanQuery)queries[0]).isCoordDisabled();
BooleanQuery result = new BooleanQuery(coordDisabled);
Iterator i = allClauses.iterator(); Iterator i = allClauses.iterator();
while (i.hasNext()) { while (i.hasNext()) {
result.add((BooleanClause)i.next()); result.add((BooleanClause)i.next());

View File

@ -64,7 +64,7 @@ public class RangeQuery extends Query
public Query rewrite(IndexReader reader) throws IOException { public Query rewrite(IndexReader reader) throws IOException {
BooleanQuery query = new BooleanQuery(); BooleanQuery query = new BooleanQuery(true);
TermEnum enumerator = reader.terms(lowerTerm); TermEnum enumerator = reader.terms(lowerTerm);
try { try {

View File

@ -0,0 +1,58 @@
package org.apache.lucene.search;
/**
* Copyright 2005 The Apache Software Foundation
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
/** Expert: Delegating scoring implementation. Useful in {@link
* Query#getSimilarity(Searcher) implementations, to override only certain
* methods of a Searcher's Similiarty implementation.. */
public class SimilarityDelegator extends Similarity {
private Similarity delegee;
/** Construct a {@link Similiarity} that delegates all methods to another.
*
* @param delegee the Similarity implementation to delegate to
*/
public SimilarityDelegator(Similarity delegee) {
this.delegee = delegee;
}
public float lengthNorm(String fieldName, int numTerms) {
return delegee.lengthNorm(fieldName, numTerms);
}
public float queryNorm(float sumOfSquaredWeights) {
return delegee.queryNorm(sumOfSquaredWeights);
}
public float tf(float freq) {
return delegee.tf(freq);
}
public float sloppyFreq(int distance) {
return delegee.sloppyFreq(distance);
}
public float idf(int docFreq, int numDocs) {
return delegee.idf(docFreq, numDocs);
}
public float coord(int overlap, int maxOverlap) {
return delegee.coord(overlap, maxOverlap);
}
}