From 0cde90fcb10c93c56d5f7d6c9228002ffdff1fec Mon Sep 17 00:00:00 2001 From: Shay Banon Date: Wed, 26 Oct 2011 01:58:12 +0200 Subject: [PATCH] minimum_number_should_match in a query_string, closes #1420. --- .../queryParser/QueryParserSettings.java | 11 ++++ .../common/lucene/search/Queries.java | 61 +++++++++++++++++++ .../index/query/BoolQueryParser.java | 2 + .../index/query/FieldQueryBuilder.java | 10 +++ .../index/query/FieldQueryParser.java | 7 +++ .../index/query/QueryStringQueryBuilder.java | 10 +++ .../index/query/QueryStringQueryParser.java | 7 +++ 7 files changed, 108 insertions(+) diff --git a/modules/elasticsearch/src/main/java/org/apache/lucene/queryParser/QueryParserSettings.java b/modules/elasticsearch/src/main/java/org/apache/lucene/queryParser/QueryParserSettings.java index 0188726f234..89628255341 100644 --- a/modules/elasticsearch/src/main/java/org/apache/lucene/queryParser/QueryParserSettings.java +++ b/modules/elasticsearch/src/main/java/org/apache/lucene/queryParser/QueryParserSettings.java @@ -43,6 +43,7 @@ public class QueryParserSettings { private boolean escape = false; private Analyzer analyzer = null; private MultiTermQuery.RewriteMethod rewriteMethod = MultiTermQuery.CONSTANT_SCORE_AUTO_REWRITE_DEFAULT; + private String minimumShouldMatch; public String queryString() { return queryString; @@ -164,6 +165,14 @@ public class QueryParserSettings { this.rewriteMethod = rewriteMethod; } + public String minimumShouldMatch() { + return this.minimumShouldMatch; + } + + public void minimumShouldMatch(String minimumShouldMatch) { + this.minimumShouldMatch = minimumShouldMatch; + } + @Override public boolean equals(Object o) { if (this == o) return true; if (o == null || getClass() != o.getClass()) return false; @@ -186,6 +195,8 @@ public class QueryParserSettings { if (queryString != null ? !queryString.equals(that.queryString) : that.queryString != null) return false; if (rewriteMethod != null ? !rewriteMethod.equals(that.rewriteMethod) : that.rewriteMethod != null) return false; + if (minimumShouldMatch != null ? !minimumShouldMatch.equals(that.minimumShouldMatch) : that.minimumShouldMatch != null) + return false; return true; } diff --git a/modules/elasticsearch/src/main/java/org/elasticsearch/common/lucene/search/Queries.java b/modules/elasticsearch/src/main/java/org/elasticsearch/common/lucene/search/Queries.java index cf79f909a2e..b5166892c59 100644 --- a/modules/elasticsearch/src/main/java/org/elasticsearch/common/lucene/search/Queries.java +++ b/modules/elasticsearch/src/main/java/org/elasticsearch/common/lucene/search/Queries.java @@ -26,9 +26,11 @@ import org.apache.lucene.search.DisjunctionMaxQuery; import org.apache.lucene.search.Filter; import org.apache.lucene.search.MatchAllDocsQuery; import org.apache.lucene.search.Query; +import org.elasticsearch.common.Nullable; import java.lang.reflect.Field; import java.util.List; +import java.util.regex.Pattern; /** * @author kimchy (shay.banon) @@ -123,4 +125,63 @@ public class Queries { } return false; } + + public static void applyMinimumShouldMatch(BooleanQuery query, @Nullable String minimumShouldMatch) { + if (minimumShouldMatch == null) { + return; + } + int optionalClauses = 0; + for (BooleanClause c : query.clauses()) { + if (c.getOccur() == BooleanClause.Occur.SHOULD) { + optionalClauses++; + } + } + + int msm = calculateMinShouldMatch(optionalClauses, minimumShouldMatch); + if (0 < msm) { + query.setMinimumNumberShouldMatch(msm); + } + } + + private static Pattern spaceAroundLessThanPattern = Pattern.compile("(\\s+<\\s*)|(\\s*<\\s+)"); + private static Pattern spacePattern = Pattern.compile(" "); + private static Pattern lessThanPattern = Pattern.compile("<"); + + static int calculateMinShouldMatch(int optionalClauseCount, String spec) { + int result = optionalClauseCount; + spec = spec.trim(); + + if (-1 < spec.indexOf("<")) { + /* we have conditional spec(s) */ + spec = spaceAroundLessThanPattern.matcher(spec).replaceAll("<"); + for (String s : spacePattern.split(spec)) { + String[] parts = lessThanPattern.split(s, 0); + int upperBound = Integer.parseInt(parts[0]); + if (optionalClauseCount <= upperBound) { + return result; + } else { + result = calculateMinShouldMatch + (optionalClauseCount, parts[1]); + } + } + return result; + } + + /* otherwise, simple expresion */ + + if (-1 < spec.indexOf('%')) { + /* percentage - assume the % was the last char. If not, let Integer.parseInt fail. */ + spec = spec.substring(0, spec.length() - 1); + int percent = Integer.parseInt(spec); + float calc = (result * percent) * (1 / 100f); + result = calc < 0 ? result + (int) calc : (int) calc; + } else { + int calc = Integer.parseInt(spec); + result = calc < 0 ? result + calc : calc; + } + + return (optionalClauseCount < result ? + optionalClauseCount : (result < 0 ? 0 : result)); + + } } diff --git a/modules/elasticsearch/src/main/java/org/elasticsearch/index/query/BoolQueryParser.java b/modules/elasticsearch/src/main/java/org/elasticsearch/index/query/BoolQueryParser.java index 53ad8a502f4..47881b22952 100644 --- a/modules/elasticsearch/src/main/java/org/elasticsearch/index/query/BoolQueryParser.java +++ b/modules/elasticsearch/src/main/java/org/elasticsearch/index/query/BoolQueryParser.java @@ -88,6 +88,8 @@ public class BoolQueryParser implements QueryParser { disableCoord = parser.booleanValue(); } else if ("minimum_number_should_match".equals(currentFieldName) || "minimumNumberShouldMatch".equals(currentFieldName)) { minimumNumberShouldMatch = parser.intValue(); + } else if ("minimum_should_match".equals(currentFieldName) || "minimumShouldMatch".equals(currentFieldName)) { + minimumNumberShouldMatch = parser.intValue(); } else if ("boost".equals(currentFieldName)) { boost = parser.floatValue(); } diff --git a/modules/elasticsearch/src/main/java/org/elasticsearch/index/query/FieldQueryBuilder.java b/modules/elasticsearch/src/main/java/org/elasticsearch/index/query/FieldQueryBuilder.java index a4b798a26fc..44e5b92ab24 100644 --- a/modules/elasticsearch/src/main/java/org/elasticsearch/index/query/FieldQueryBuilder.java +++ b/modules/elasticsearch/src/main/java/org/elasticsearch/index/query/FieldQueryBuilder.java @@ -67,6 +67,8 @@ public class FieldQueryBuilder extends BaseQueryBuilder { private String rewrite; + private String minimumShouldMatch; + /** * A query that executes the query string against a field. It is a simplified * version of {@link QueryStringQueryBuilder} that simply runs against @@ -276,6 +278,11 @@ public class FieldQueryBuilder extends BaseQueryBuilder { return this; } + public FieldQueryBuilder minimumShouldMatch(String minimumShouldMatch) { + this.minimumShouldMatch = minimumShouldMatch; + return this; + } + @Override public void doXContent(XContentBuilder builder, Params params) throws IOException { builder.startObject(FieldQueryParser.NAME); if (!extraSet) { @@ -319,6 +326,9 @@ public class FieldQueryBuilder extends BaseQueryBuilder { if (rewrite != null) { builder.field("rewrite", rewrite); } + if (minimumShouldMatch != null) { + builder.field("minimum_should_match", minimumShouldMatch); + } builder.endObject(); } builder.endObject(); diff --git a/modules/elasticsearch/src/main/java/org/elasticsearch/index/query/FieldQueryParser.java b/modules/elasticsearch/src/main/java/org/elasticsearch/index/query/FieldQueryParser.java index 4a9fc7d1dd0..670c525672c 100644 --- a/modules/elasticsearch/src/main/java/org/elasticsearch/index/query/FieldQueryParser.java +++ b/modules/elasticsearch/src/main/java/org/elasticsearch/index/query/FieldQueryParser.java @@ -22,8 +22,10 @@ package org.elasticsearch.index.query; import org.apache.lucene.queryParser.MapperQueryParser; import org.apache.lucene.queryParser.ParseException; import org.apache.lucene.queryParser.QueryParserSettings; +import org.apache.lucene.search.BooleanQuery; import org.apache.lucene.search.Query; import org.elasticsearch.common.inject.Inject; +import org.elasticsearch.common.lucene.search.Queries; import org.elasticsearch.common.xcontent.XContentParser; import org.elasticsearch.index.query.support.QueryParsers; @@ -96,6 +98,8 @@ public class FieldQueryParser implements QueryParser { qpSettings.analyzeWildcard(parser.booleanValue()); } else if ("rewrite".equals(currentFieldName)) { qpSettings.rewriteMethod(QueryParsers.parseRewriteMethod(parser.textOrNull())); + } else if ("minimum_should_match".equals(currentFieldName) || "minimumShouldMatch".equals(currentFieldName)) { + qpSettings.minimumShouldMatch(parser.textOrNull()); } } } @@ -129,6 +133,9 @@ public class FieldQueryParser implements QueryParser { query = queryParser.parse(qpSettings.queryString()); query.setBoost(qpSettings.boost()); query = optimizeQuery(fixNegativeQueryIfNeeded(query)); + if (query instanceof BooleanQuery) { + Queries.applyMinimumShouldMatch((BooleanQuery) query, qpSettings.minimumShouldMatch()); + } parseContext.indexCache().queryParserCache().put(qpSettings, query); return query; } catch (ParseException e) { diff --git a/modules/elasticsearch/src/main/java/org/elasticsearch/index/query/QueryStringQueryBuilder.java b/modules/elasticsearch/src/main/java/org/elasticsearch/index/query/QueryStringQueryBuilder.java index a3fea03f644..8920becff3b 100644 --- a/modules/elasticsearch/src/main/java/org/elasticsearch/index/query/QueryStringQueryBuilder.java +++ b/modules/elasticsearch/src/main/java/org/elasticsearch/index/query/QueryStringQueryBuilder.java @@ -80,6 +80,8 @@ public class QueryStringQueryBuilder extends BaseQueryBuilder { private String rewrite = null; + private String minimumShouldMatch; + public QueryStringQueryBuilder(String queryString) { this.queryString = queryString; } @@ -242,6 +244,11 @@ public class QueryStringQueryBuilder extends BaseQueryBuilder { return this; } + public QueryStringQueryBuilder minimumShouldMatch(String minimumShouldMatch) { + this.minimumShouldMatch = minimumShouldMatch; + return this; + } + /** * Sets the boost for this query. Documents matching this query will (in addition to the normal * weightings) have their score multiplied by the boost provided. @@ -313,6 +320,9 @@ public class QueryStringQueryBuilder extends BaseQueryBuilder { if (rewrite != null) { builder.field("rewrite", rewrite); } + if (minimumShouldMatch != null) { + builder.field("minimum_should_write", minimumShouldMatch); + } builder.endObject(); } } diff --git a/modules/elasticsearch/src/main/java/org/elasticsearch/index/query/QueryStringQueryParser.java b/modules/elasticsearch/src/main/java/org/elasticsearch/index/query/QueryStringQueryParser.java index 18a13b52a5f..bd03a19431d 100644 --- a/modules/elasticsearch/src/main/java/org/elasticsearch/index/query/QueryStringQueryParser.java +++ b/modules/elasticsearch/src/main/java/org/elasticsearch/index/query/QueryStringQueryParser.java @@ -22,10 +22,12 @@ package org.elasticsearch.index.query; import org.apache.lucene.queryParser.MapperQueryParser; import org.apache.lucene.queryParser.MultiFieldQueryParserSettings; import org.apache.lucene.queryParser.ParseException; +import org.apache.lucene.search.BooleanQuery; import org.apache.lucene.search.Query; import org.elasticsearch.common.Strings; import org.elasticsearch.common.collect.Lists; import org.elasticsearch.common.inject.Inject; +import org.elasticsearch.common.lucene.search.Queries; import org.elasticsearch.common.regex.Regex; import org.elasticsearch.common.trove.impl.Constants; import org.elasticsearch.common.trove.map.hash.TObjectFloatHashMap; @@ -147,6 +149,8 @@ public class QueryStringQueryParser implements QueryParser { qpSettings.analyzeWildcard(parser.booleanValue()); } else if ("rewrite".equals(currentFieldName)) { qpSettings.rewriteMethod(QueryParsers.parseRewriteMethod(parser.textOrNull())); + } else if ("minimum_should_match".equals(currentFieldName) || "minimumShouldMatch".equals(currentFieldName)) { + qpSettings.minimumShouldMatch(parser.textOrNull()); } } } @@ -184,6 +188,9 @@ public class QueryStringQueryParser implements QueryParser { query = queryParser.parse(qpSettings.queryString()); query.setBoost(qpSettings.boost()); query = optimizeQuery(fixNegativeQueryIfNeeded(query)); + if (query instanceof BooleanQuery) { + Queries.applyMinimumShouldMatch((BooleanQuery) query, qpSettings.minimumShouldMatch()); + } parseContext.indexCache().queryParserCache().put(qpSettings, query); return query; } catch (ParseException e) {