From 4d1970ebab010caf73ae19d5ac46a2d0ae39dd3a Mon Sep 17 00:00:00 2001 From: Doug Cutting Date: Mon, 7 Mar 2005 19:26:27 +0000 Subject: [PATCH] Patch #33472. Disable coord() in automatically generated queries. git-svn-id: https://svn.apache.org/repos/asf/lucene/java/trunk@156438 13f79535-47bb-0310-9956-ffa450edef68 --- CHANGES.txt | 11 +++- .../queryParser/MultiFieldQueryParser.java | 8 +-- .../lucene/queryParser/QueryParser.java | 24 +++++++- .../apache/lucene/queryParser/QueryParser.jj | 24 +++++++- .../apache/lucene/search/BooleanQuery.java | 34 +++++++++++ .../org/apache/lucene/search/FuzzyQuery.java | 2 +- .../lucene/search/MultiPhraseQuery.java | 2 +- .../apache/lucene/search/MultiTermQuery.java | 2 +- .../lucene/search/PhrasePrefixQuery.java | 2 +- .../org/apache/lucene/search/PrefixQuery.java | 2 +- src/java/org/apache/lucene/search/Query.java | 4 +- .../org/apache/lucene/search/RangeQuery.java | 2 +- .../lucene/search/SimilarityDelegator.java | 58 +++++++++++++++++++ 13 files changed, 157 insertions(+), 18 deletions(-) create mode 100644 src/java/org/apache/lucene/search/SimilarityDelegator.java diff --git a/CHANGES.txt b/CHANGES.txt index 8456f7b9b6d..b87a481cf01 100644 --- a/CHANGES.txt +++ b/CHANGES.txt @@ -12,7 +12,7 @@ Changes in runtime behavior go into the rewritten query and thus the exception is avoided. (Christoph) - 2. Changed system property from "org.apache.lucene.lockdir" to + 2. Changed system property from "org.apache.lucene.lockdir" to "org.apache.lucene.lockDir", so that its casing follows the existing pattern used in other Lucene system properties. (Bernhard) @@ -116,6 +116,15 @@ Bug fixes 5. Small bug in skipTo of ConjunctionScorer that caused NullPointerException if skipTo() was called without prior call to next() fixed. (Christoph) + 6. Disable Similiarty.coord() in the scoring of most automatically + generated boolean queries. The coord() score factor is + appropriate when clauses are independently specified by a user, + but is usually not appropriate when clauses are generated + automatically, e.g., by a fuzzy, wildcard or range query. Matches + on such automatically generated queries are no longer penalized + for not matching all terms. (Doug Cutting, Patch #33472) + + Optimizations 1. Disk usage (peak requirements during indexing and optimization) diff --git a/src/java/org/apache/lucene/queryParser/MultiFieldQueryParser.java b/src/java/org/apache/lucene/queryParser/MultiFieldQueryParser.java index 5906092ee22..a32d6bcc1c5 100644 --- a/src/java/org/apache/lucene/queryParser/MultiFieldQueryParser.java +++ b/src/java/org/apache/lucene/queryParser/MultiFieldQueryParser.java @@ -63,7 +63,7 @@ public class MultiFieldQueryParser extends QueryParser for (int i = 0; i < fields.length; i++) clauses.add(new BooleanClause(super.getFieldQuery(fields[i], queryText), BooleanClause.Occur.SHOULD)); - return getBooleanQuery(clauses); + return getBooleanQuery(clauses, true); } return super.getFieldQuery(field, queryText); } @@ -95,7 +95,7 @@ public class MultiFieldQueryParser extends QueryParser clauses.add(new BooleanClause(super.getFuzzyQuery(fields[i], termStr, minSimilarity), BooleanClause.Occur.SHOULD)); } - return getBooleanQuery(clauses); + return getBooleanQuery(clauses, true); } return super.getFuzzyQuery(field, termStr, minSimilarity); } @@ -108,7 +108,7 @@ public class MultiFieldQueryParser extends QueryParser clauses.add(new BooleanClause(super.getPrefixQuery(fields[i], termStr), BooleanClause.Occur.SHOULD)); } - return getBooleanQuery(clauses); + return getBooleanQuery(clauses, true); } return super.getPrefixQuery(field, termStr); } @@ -128,7 +128,7 @@ public class MultiFieldQueryParser extends QueryParser clauses.add(new BooleanClause(super.getRangeQuery(fields[i], part1, part2, inclusive), BooleanClause.Occur.SHOULD)); } - return getBooleanQuery(clauses); + return getBooleanQuery(clauses, true); } return super.getRangeQuery(field, part1, part2, inclusive); } diff --git a/src/java/org/apache/lucene/queryParser/QueryParser.java b/src/java/org/apache/lucene/queryParser/QueryParser.java index 15c40a34d9e..8a0f604954d 100644 --- a/src/java/org/apache/lucene/queryParser/QueryParser.java +++ b/src/java/org/apache/lucene/queryParser/QueryParser.java @@ -393,7 +393,7 @@ public class QueryParser implements QueryParserConstants { if (severalTokensAtSamePosition) { if (positionCount == 1) { // no phrase query: - BooleanQuery q = new BooleanQuery(); + BooleanQuery q = new BooleanQuery(true); for (int i = 0; i < v.size(); i++) { t = (org.apache.lucene.analysis.Token) v.elementAt(i); TermQuery currentQuery = new TermQuery( @@ -521,9 +521,27 @@ public class QueryParser implements QueryParserConstants { * @return Resulting {@link Query} object. * @exception ParseException throw in overridden method to disallow */ - protected Query getBooleanQuery(Vector clauses) throws ParseException + protected Query getBooleanQuery(Vector clauses) throws ParseException { + return getBooleanQuery(clauses, false); + } + /** + * Factory method for generating query, given a set of clauses. + * By default creates a boolean query composed of clauses passed in. + * + * Can be overridden by extending classes, to modify query being + * returned. + * + * @param clauses Vector that contains {@link BooleanClause} instances + * to join. + * @param disableCoord true if coord scoring should be disabled. + * + * @return Resulting {@link Query} object. + * @exception ParseException throw in overridden method to disallow + */ + protected Query getBooleanQuery(Vector clauses, boolean disableCoord) + throws ParseException { - BooleanQuery query = new BooleanQuery(); + BooleanQuery query = new BooleanQuery(disableCoord); for (int i = 0; i < clauses.size(); i++) { query.add((BooleanClause)clauses.elementAt(i)); } diff --git a/src/java/org/apache/lucene/queryParser/QueryParser.jj b/src/java/org/apache/lucene/queryParser/QueryParser.jj index da017fb94b3..3149ae5f8e9 100644 --- a/src/java/org/apache/lucene/queryParser/QueryParser.jj +++ b/src/java/org/apache/lucene/queryParser/QueryParser.jj @@ -416,7 +416,7 @@ public class QueryParser { if (severalTokensAtSamePosition) { if (positionCount == 1) { // no phrase query: - BooleanQuery q = new BooleanQuery(); + BooleanQuery q = new BooleanQuery(true); for (int i = 0; i < v.size(); i++) { t = (org.apache.lucene.analysis.Token) v.elementAt(i); TermQuery currentQuery = new TermQuery( @@ -544,9 +544,27 @@ public class QueryParser { * @return Resulting {@link Query} object. * @exception ParseException throw in overridden method to disallow */ - protected Query getBooleanQuery(Vector clauses) throws ParseException + protected Query getBooleanQuery(Vector clauses) throws ParseException { + getBooleanQuery(clauses, false); + } + /** + * Factory method for generating query, given a set of clauses. + * By default creates a boolean query composed of clauses passed in. + * + * Can be overridden by extending classes, to modify query being + * returned. + * + * @param clauses Vector that contains {@link BooleanClause} instances + * to join. + * @param disableCoord true if coord scoring should be disabled. + * + * @return Resulting {@link Query} object. + * @exception ParseException throw in overridden method to disallow + */ + protected Query getBooleanQuery(Vector clauses, boolean disableCoord) + throws ParseException { - BooleanQuery query = new BooleanQuery(); + BooleanQuery query = new BooleanQuery(disableCoord); for (int i = 0; i < clauses.size(); i++) { query.add((BooleanClause)clauses.elementAt(i)); } diff --git a/src/java/org/apache/lucene/search/BooleanQuery.java b/src/java/org/apache/lucene/search/BooleanQuery.java index fb531ec5595..9667a7bc98f 100644 --- a/src/java/org/apache/lucene/search/BooleanQuery.java +++ b/src/java/org/apache/lucene/search/BooleanQuery.java @@ -63,10 +63,44 @@ public class BooleanQuery extends Query { } private Vector clauses = new Vector(); + private boolean disableCoord; /** Constructs an empty boolean query. */ public BooleanQuery() {} + /** Constructs an empty boolean query. + * + * {@link Similarity#coord(int,int)} may be disabled in scoring, as + * appropriate. For example, this score factor does not make sense for most + * automatically generated queries, like {@link WildCardQuery} and {@link + * FuzzyQuery}. + * + * @param disableCoord disables {@link Similarity#coord(int,int)} in scoring. + */ + public BooleanQuery(boolean disableCoord) { + this.disableCoord = disableCoord; + } + + /** Returns true iff {@link Similarity#coord(int,int)} is disabled in + * scoring for this query instance. + * @see BooleanQuery(boolean) + */ + public boolean isCoordDisabled() { return disableCoord; } + + // Implement coord disabling. + // Inherit javadoc. + public Similarity getSimilarity(Searcher searcher) { + Similarity result = super.getSimilarity(searcher); + if (disableCoord) { // disable coord as requested + result = new SimilarityDelegator(result) { + public float coord(int overlap, int maxOverlap) { + return 1.0f; + } + }; + } + return result; + } + /** Adds a clause to a boolean query. Clauses may be: *