mirror of
https://github.com/honeymoose/OpenSearch.git
synced 2025-03-09 14:34:43 +00:00
Expose Lucene's new TopTermsBlendedFreqScoringRewrite.
This rewrite method is interesting because it computes scores as if all terms had the same frequencies, which avoids disappointments with ranking when a fuzzy query ranks typos first given that they are less frequent than the correct term.
This commit is contained in:
parent
9519100eca
commit
8238f497d8
@ -95,7 +95,7 @@ public class FuzzyQueryParser implements QueryParser {
|
||||
} else if ("transpositions".equals(currentFieldName)) {
|
||||
transpositions = parser.booleanValue();
|
||||
} else if ("rewrite".equals(currentFieldName)) {
|
||||
rewriteMethod = QueryParsers.parseRewriteMethod(parser.textOrNull(), null);
|
||||
rewriteMethod = QueryParsers.parseRewriteMethod(parseContext.parseFieldMatcher(), parser.textOrNull(), null);
|
||||
} else if ("_name".equals(currentFieldName)) {
|
||||
queryName = parser.text();
|
||||
} else {
|
||||
|
@ -124,9 +124,9 @@ public class MatchQueryParser implements QueryParser {
|
||||
} else if ("minimum_should_match".equals(currentFieldName) || "minimumShouldMatch".equals(currentFieldName)) {
|
||||
minimumShouldMatch = parser.textOrNull();
|
||||
} else if ("rewrite".equals(currentFieldName)) {
|
||||
matchQuery.setRewriteMethod(QueryParsers.parseRewriteMethod(parser.textOrNull(), null));
|
||||
matchQuery.setRewriteMethod(QueryParsers.parseRewriteMethod(parseContext.parseFieldMatcher(), parser.textOrNull(), null));
|
||||
} else if ("fuzzy_rewrite".equals(currentFieldName) || "fuzzyRewrite".equals(currentFieldName)) {
|
||||
matchQuery.setFuzzyRewriteMethod(QueryParsers.parseRewriteMethod(parser.textOrNull(), null));
|
||||
matchQuery.setFuzzyRewriteMethod(QueryParsers.parseRewriteMethod(parseContext.parseFieldMatcher(), parser.textOrNull(), null));
|
||||
} else if ("fuzzy_transpositions".equals(currentFieldName)) {
|
||||
matchQuery.setTranspositions(parser.booleanValue());
|
||||
} else if ("lenient".equals(currentFieldName)) {
|
||||
|
@ -114,9 +114,9 @@ public class MultiMatchQueryParser implements QueryParser {
|
||||
} else if ("minimum_should_match".equals(currentFieldName) || "minimumShouldMatch".equals(currentFieldName)) {
|
||||
minimumShouldMatch = parser.textOrNull();
|
||||
} else if ("rewrite".equals(currentFieldName)) {
|
||||
multiMatchQuery.setRewriteMethod(QueryParsers.parseRewriteMethod(parser.textOrNull(), null));
|
||||
multiMatchQuery.setRewriteMethod(QueryParsers.parseRewriteMethod(parseContext.parseFieldMatcher(), parser.textOrNull(), null));
|
||||
} else if ("fuzzy_rewrite".equals(currentFieldName) || "fuzzyRewrite".equals(currentFieldName)) {
|
||||
multiMatchQuery.setFuzzyRewriteMethod(QueryParsers.parseRewriteMethod(parser.textOrNull(), null));
|
||||
multiMatchQuery.setFuzzyRewriteMethod(QueryParsers.parseRewriteMethod(parseContext.parseFieldMatcher(), parser.textOrNull(), null));
|
||||
} else if ("use_dis_max".equals(currentFieldName) || "useDisMax".equals(currentFieldName)) {
|
||||
useDisMax = parser.booleanValue();
|
||||
} else if ("tie_breaker".equals(currentFieldName) || "tieBreaker".equals(currentFieldName)) {
|
||||
|
@ -97,7 +97,7 @@ public class PrefixQueryParser implements QueryParser {
|
||||
throw new QueryParsingException(parseContext, "No value specified for prefix query");
|
||||
}
|
||||
|
||||
MultiTermQuery.RewriteMethod method = QueryParsers.parseRewriteMethod(rewriteMethod, null);
|
||||
MultiTermQuery.RewriteMethod method = QueryParsers.parseRewriteMethod(parseContext.parseFieldMatcher(), rewriteMethod, null);
|
||||
|
||||
Query query = null;
|
||||
MappedFieldType fieldType = parseContext.fieldMapper(fieldName);
|
||||
|
@ -175,7 +175,7 @@ public class QueryStringQueryParser implements QueryParser {
|
||||
} else if ("fuzzy_max_expansions".equals(currentFieldName) || "fuzzyMaxExpansions".equals(currentFieldName)) {
|
||||
qpSettings.fuzzyMaxExpansions(parser.intValue());
|
||||
} else if ("fuzzy_rewrite".equals(currentFieldName) || "fuzzyRewrite".equals(currentFieldName)) {
|
||||
qpSettings.fuzzyRewriteMethod(QueryParsers.parseRewriteMethod(parser.textOrNull()));
|
||||
qpSettings.fuzzyRewriteMethod(QueryParsers.parseRewriteMethod(parseContext.parseFieldMatcher(), parser.textOrNull()));
|
||||
} else if ("phrase_slop".equals(currentFieldName) || "phraseSlop".equals(currentFieldName)) {
|
||||
qpSettings.phraseSlop(parser.intValue());
|
||||
} else if (parseContext.parseFieldMatcher().match(currentFieldName, FUZZINESS)) {
|
||||
@ -187,7 +187,7 @@ public class QueryStringQueryParser implements QueryParser {
|
||||
} else if ("analyze_wildcard".equals(currentFieldName) || "analyzeWildcard".equals(currentFieldName)) {
|
||||
qpSettings.analyzeWildcard(parser.booleanValue());
|
||||
} else if ("rewrite".equals(currentFieldName)) {
|
||||
qpSettings.rewriteMethod(QueryParsers.parseRewriteMethod(parser.textOrNull()));
|
||||
qpSettings.rewriteMethod(QueryParsers.parseRewriteMethod(parseContext.parseFieldMatcher(), parser.textOrNull()));
|
||||
} else if ("minimum_should_match".equals(currentFieldName) || "minimumShouldMatch".equals(currentFieldName)) {
|
||||
qpSettings.minimumShouldMatch(parser.textOrNull());
|
||||
} else if ("quote_field_suffix".equals(currentFieldName) || "quoteFieldSuffix".equals(currentFieldName)) {
|
||||
|
@ -109,7 +109,7 @@ public class RegexpQueryParser implements QueryParser {
|
||||
throw new QueryParsingException(parseContext, "No value specified for regexp query");
|
||||
}
|
||||
|
||||
MultiTermQuery.RewriteMethod method = QueryParsers.parseRewriteMethod(rewriteMethod, null);
|
||||
MultiTermQuery.RewriteMethod method = QueryParsers.parseRewriteMethod(parseContext.parseFieldMatcher(), rewriteMethod, null);
|
||||
|
||||
Query query = null;
|
||||
MappedFieldType fieldType = parseContext.fieldMapper(fieldName);
|
||||
|
@ -25,7 +25,6 @@ import org.apache.lucene.search.WildcardQuery;
|
||||
import org.apache.lucene.util.BytesRef;
|
||||
import org.elasticsearch.common.inject.Inject;
|
||||
import org.elasticsearch.common.xcontent.XContentParser;
|
||||
import org.elasticsearch.index.mapper.FieldMapper;
|
||||
import org.elasticsearch.index.mapper.MappedFieldType;
|
||||
import org.elasticsearch.index.query.support.QueryParsers;
|
||||
|
||||
@ -103,8 +102,8 @@ public class WildcardQueryParser implements QueryParser {
|
||||
}
|
||||
|
||||
WildcardQuery wildcardQuery = new WildcardQuery(new Term(fieldName, valueBytes));
|
||||
QueryParsers.setRewriteMethod(wildcardQuery, rewriteMethod);
|
||||
wildcardQuery.setRewriteMethod(QueryParsers.parseRewriteMethod(rewriteMethod));
|
||||
QueryParsers.setRewriteMethod(wildcardQuery, parseContext.parseFieldMatcher(), rewriteMethod);
|
||||
wildcardQuery.setRewriteMethod(QueryParsers.parseRewriteMethod(parseContext.parseFieldMatcher(), rewriteMethod));
|
||||
wildcardQuery.setBoost(boost);
|
||||
if (queryName != null) {
|
||||
parseContext.addNamedQuery(queryName, wildcardQuery);
|
||||
|
@ -20,14 +20,22 @@
|
||||
package org.elasticsearch.index.query.support;
|
||||
|
||||
import org.apache.lucene.search.MultiTermQuery;
|
||||
|
||||
import org.elasticsearch.common.Nullable;
|
||||
import org.elasticsearch.common.ParseField;
|
||||
import org.elasticsearch.common.ParseFieldMatcher;
|
||||
|
||||
/**
|
||||
*
|
||||
*/
|
||||
public final class QueryParsers {
|
||||
|
||||
private static final ParseField CONSTANT_SCORE = new ParseField("constant_score", "constant_score_auto", "constant_score_filter");
|
||||
private static final ParseField SCORING_BOOLEAN = new ParseField("scoring_boolean");
|
||||
private static final ParseField CONSTANT_SCORE_BOOLEAN = new ParseField("constant_score_boolean");
|
||||
private static final ParseField TOP_TERMS = new ParseField("top_terms_");
|
||||
private static final ParseField TOP_TERMS_BOOST = new ParseField("top_terms_boost_");
|
||||
private static final ParseField TOP_TERMS_BLENDED_FREQS = new ParseField("top_terms_blended_freqs_");
|
||||
|
||||
private QueryParsers() {
|
||||
|
||||
}
|
||||
@ -39,50 +47,55 @@ public final class QueryParsers {
|
||||
query.setRewriteMethod(rewriteMethod);
|
||||
}
|
||||
|
||||
public static void setRewriteMethod(MultiTermQuery query, @Nullable String rewriteMethod) {
|
||||
public static void setRewriteMethod(MultiTermQuery query, ParseFieldMatcher matcher, @Nullable String rewriteMethod) {
|
||||
if (rewriteMethod == null) {
|
||||
return;
|
||||
}
|
||||
query.setRewriteMethod(parseRewriteMethod(rewriteMethod));
|
||||
query.setRewriteMethod(parseRewriteMethod(matcher, rewriteMethod));
|
||||
}
|
||||
|
||||
public static MultiTermQuery.RewriteMethod parseRewriteMethod(@Nullable String rewriteMethod) {
|
||||
return parseRewriteMethod(rewriteMethod, MultiTermQuery.CONSTANT_SCORE_FILTER_REWRITE);
|
||||
public static MultiTermQuery.RewriteMethod parseRewriteMethod(ParseFieldMatcher matcher, @Nullable String rewriteMethod) {
|
||||
return parseRewriteMethod(matcher, rewriteMethod, MultiTermQuery.CONSTANT_SCORE_REWRITE);
|
||||
}
|
||||
|
||||
public static MultiTermQuery.RewriteMethod parseRewriteMethod(@Nullable String rewriteMethod, @Nullable MultiTermQuery.RewriteMethod defaultRewriteMethod) {
|
||||
public static MultiTermQuery.RewriteMethod parseRewriteMethod(ParseFieldMatcher matcher, @Nullable String rewriteMethod, @Nullable MultiTermQuery.RewriteMethod defaultRewriteMethod) {
|
||||
if (rewriteMethod == null) {
|
||||
return defaultRewriteMethod;
|
||||
}
|
||||
if ("constant_score_auto".equals(rewriteMethod) || "constant_score_auto".equals(rewriteMethod)) {
|
||||
return MultiTermQuery.CONSTANT_SCORE_FILTER_REWRITE;
|
||||
if (matcher.match(rewriteMethod, CONSTANT_SCORE)) {
|
||||
return MultiTermQuery.CONSTANT_SCORE_REWRITE;
|
||||
}
|
||||
if ("scoring_boolean".equals(rewriteMethod) || "scoringBoolean".equals(rewriteMethod)) {
|
||||
return MultiTermQuery.SCORING_BOOLEAN_QUERY_REWRITE;
|
||||
if (matcher.match(rewriteMethod, SCORING_BOOLEAN)) {
|
||||
return MultiTermQuery.SCORING_BOOLEAN_REWRITE;
|
||||
}
|
||||
if ("constant_score_boolean".equals(rewriteMethod) || "constantScoreBoolean".equals(rewriteMethod)) {
|
||||
return MultiTermQuery.CONSTANT_SCORE_BOOLEAN_QUERY_REWRITE;
|
||||
if (matcher.match(rewriteMethod, CONSTANT_SCORE_BOOLEAN)) {
|
||||
return MultiTermQuery.CONSTANT_SCORE_BOOLEAN_REWRITE;
|
||||
}
|
||||
if ("constant_score_filter".equals(rewriteMethod) || "constantScoreFilter".equals(rewriteMethod)) {
|
||||
return MultiTermQuery.CONSTANT_SCORE_FILTER_REWRITE;
|
||||
|
||||
int firstDigit = -1;
|
||||
for (int i = 0; i < rewriteMethod.length(); ++i) {
|
||||
if (Character.isDigit(rewriteMethod.charAt(i))) {
|
||||
firstDigit = i;
|
||||
break;
|
||||
}
|
||||
}
|
||||
if (rewriteMethod.startsWith("top_terms_boost_")) {
|
||||
int size = Integer.parseInt(rewriteMethod.substring("top_terms_boost_".length()));
|
||||
return new MultiTermQuery.TopTermsBoostOnlyBooleanQueryRewrite(size);
|
||||
}
|
||||
if (rewriteMethod.startsWith("topTermsBoost")) {
|
||||
int size = Integer.parseInt(rewriteMethod.substring("topTermsBoost".length()));
|
||||
return new MultiTermQuery.TopTermsBoostOnlyBooleanQueryRewrite(size);
|
||||
}
|
||||
if (rewriteMethod.startsWith("top_terms_")) {
|
||||
int size = Integer.parseInt(rewriteMethod.substring("top_terms_".length()));
|
||||
return new MultiTermQuery.TopTermsScoringBooleanQueryRewrite(size);
|
||||
}
|
||||
if (rewriteMethod.startsWith("topTerms")) {
|
||||
int size = Integer.parseInt(rewriteMethod.substring("topTerms".length()));
|
||||
return new MultiTermQuery.TopTermsScoringBooleanQueryRewrite(size);
|
||||
|
||||
if (firstDigit >= 0) {
|
||||
final int size = Integer.parseInt(rewriteMethod.substring(firstDigit));
|
||||
String rewriteMethodName = rewriteMethod.substring(0, firstDigit);
|
||||
|
||||
if (matcher.match(rewriteMethodName, TOP_TERMS)) {
|
||||
return new MultiTermQuery.TopTermsScoringBooleanQueryRewrite(size);
|
||||
}
|
||||
if (matcher.match(rewriteMethodName, TOP_TERMS_BOOST)) {
|
||||
return new MultiTermQuery.TopTermsBoostOnlyBooleanQueryRewrite(size);
|
||||
}
|
||||
if (matcher.match(rewriteMethodName, TOP_TERMS_BLENDED_FREQS)) {
|
||||
return new MultiTermQuery.TopTermsBlendedFreqScoringRewrite(size);
|
||||
}
|
||||
}
|
||||
|
||||
throw new IllegalArgumentException("Failed to parse rewrite_method [" + rewriteMethod + "]");
|
||||
}
|
||||
|
||||
|
||||
}
|
||||
|
@ -21,6 +21,7 @@ package org.elasticsearch.index.query;
|
||||
|
||||
import com.google.common.collect.Lists;
|
||||
import com.google.common.collect.Sets;
|
||||
|
||||
import org.apache.lucene.analysis.core.WhitespaceAnalyzer;
|
||||
import org.apache.lucene.index.*;
|
||||
import org.apache.lucene.index.memory.MemoryIndex;
|
||||
@ -29,6 +30,7 @@ import org.apache.lucene.queries.ExtendedCommonTermsQuery;
|
||||
import org.apache.lucene.queries.TermsQuery;
|
||||
import org.apache.lucene.search.*;
|
||||
import org.apache.lucene.search.BooleanClause.Occur;
|
||||
import org.apache.lucene.search.MultiTermQuery.RewriteMethod;
|
||||
import org.apache.lucene.search.join.ToParentBlockJoinQuery;
|
||||
import org.apache.lucene.search.spans.*;
|
||||
import org.apache.lucene.spatial.prefix.IntersectsPrefixTreeFilter;
|
||||
@ -68,6 +70,7 @@ import org.junit.Before;
|
||||
import org.junit.Test;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.util.Arrays;
|
||||
import java.util.EnumSet;
|
||||
import java.util.List;
|
||||
|
||||
@ -428,6 +431,7 @@ public class SimpleIndexQueryParserTests extends ElasticsearchSingleNodeTest {
|
||||
assertThat(parsedQuery, instanceOf(FuzzyQuery.class));
|
||||
FuzzyQuery fuzzyQuery = (FuzzyQuery) parsedQuery;
|
||||
assertThat(fuzzyQuery.getTerm(), equalTo(new Term("name.first", "sh")));
|
||||
assertThat(fuzzyQuery.getRewriteMethod(), instanceOf(MultiTermQuery.TopTermsBlendedFreqScoringRewrite.class));
|
||||
}
|
||||
|
||||
@Test
|
||||
@ -2423,4 +2427,16 @@ public class SimpleIndexQueryParserTests extends ElasticsearchSingleNodeTest {
|
||||
q = csq.getQuery();
|
||||
assertThat(q, instanceOf(TermsQuery.class));
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testBlendedRewriteMethod() throws IOException {
|
||||
IndexQueryParserService queryParser = queryParser();
|
||||
for (String rewrite : Arrays.asList("top_terms_blended_freqs_10", "topTermsBlendedFreqs10")) {
|
||||
Query parsedQuery = queryParser.parse(prefixQuery("field", "val").rewrite(rewrite)).query();
|
||||
assertThat(parsedQuery, instanceOf(PrefixQuery.class));
|
||||
PrefixQuery prefixQuery = (PrefixQuery) parsedQuery;
|
||||
assertThat(prefixQuery.getPrefix(), equalTo(new Term("field", "val")));
|
||||
assertThat(prefixQuery.getRewriteMethod(), instanceOf(MultiTermQuery.TopTermsBlendedFreqScoringRewrite.class));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -10,9 +10,11 @@ also happens on the
|
||||
All of those queries allow to control how they will get rewritten using
|
||||
the `rewrite` parameter:
|
||||
|
||||
* When not set, or set to `constant_score_auto`, defaults to
|
||||
automatically choosing either `constant_score_boolean` or
|
||||
`constant_score_filter` based on query characteristics.
|
||||
* `constant_score` (default): A rewrite method that performs like
|
||||
`constant_score_boolean` when there are few matching terms and otherwise
|
||||
visits all matching terms in sequence and marks documents for that term.
|
||||
Matching documents are assigned a constant score equal to the query's
|
||||
boost.
|
||||
* `scoring_boolean`: A rewrite method that first translates each term
|
||||
into a should clause in a boolean query, and keeps the scores as
|
||||
computed by the query. Note that typically such scores are meaningless
|
||||
@ -25,10 +27,6 @@ are not computed. Instead, each matching document receives a constant
|
||||
score equal to the query's boost. This rewrite method will hit too many
|
||||
clauses failure if it exceeds the boolean query limit (defaults to
|
||||
`1024`).
|
||||
* `constant_score_filter`: A rewrite method that first creates a private
|
||||
Filter by visiting each term in sequence and marking all docs for that
|
||||
term. Matching documents are assigned a constant score equal to the
|
||||
query's boost.
|
||||
* `top_terms_N`: A rewrite method that first translates each term into
|
||||
should clause in boolean query, and keeps the scores as computed by the
|
||||
query. This rewrite method only uses the top scoring terms so it will
|
||||
@ -39,4 +37,9 @@ into should clause in boolean query, but the scores are only computed as
|
||||
the boost. This rewrite method only uses the top scoring terms so it
|
||||
will not overflow the boolean max clause count. The `N` controls the
|
||||
size of the top scoring terms to use.
|
||||
|
||||
* `top_terms_blended_freqs_N`: A rewrite method that first translates each
|
||||
term into should clause in boolean query, but all term queries compute scores
|
||||
as if they had the same frequency. In practice the frequency which is used
|
||||
is the maximum frequency of all matching terms. This rewrite method only uses
|
||||
the top scoring terms so it will not overflow boolean max clause count. The
|
||||
`N` controls the size of the top scoring terms to use.
|
||||
|
Loading…
x
Reference in New Issue
Block a user