percolator: add support for term extraction for MultiPhraseQuery

This commit is contained in:
Martijn van Groningen 2017-02-14 23:14:49 +01:00
parent 9105672969
commit 81d53470e7
2 changed files with 34 additions and 0 deletions

View File

@ -28,6 +28,7 @@ import org.apache.lucene.search.BoostQuery;
import org.apache.lucene.search.ConstantScoreQuery;
import org.apache.lucene.search.DisjunctionMaxQuery;
import org.apache.lucene.search.MatchNoDocsQuery;
import org.apache.lucene.search.MultiPhraseQuery;
import org.apache.lucene.search.PhraseQuery;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.SynonymQuery;
@ -44,6 +45,7 @@ import org.elasticsearch.common.logging.LoggerMessageFormat;
import org.elasticsearch.common.lucene.search.function.FunctionScoreQuery;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collections;
import java.util.HashMap;
import java.util.HashSet;
@ -66,6 +68,7 @@ public final class QueryAnalyzer {
map.put(CommonTermsQuery.class, commonTermsQuery());
map.put(BlendedTermQuery.class, blendedTermQuery());
map.put(PhraseQuery.class, phraseQuery());
map.put(MultiPhraseQuery.class, multiPhraseQuery());
map.put(SpanTermQuery.class, spanTermQuery());
map.put(SpanNearQuery.class, spanNearQuery());
map.put(SpanOrQuery.class, spanOrQuery());
@ -197,6 +200,21 @@ public final class QueryAnalyzer {
};
}
static Function<Query, Result> multiPhraseQuery() {
return query -> {
Term[][] terms = ((MultiPhraseQuery) query).getTermArrays();
if (terms.length == 0) {
return new Result(true, Collections.emptySet());
}
Set<Term> bestTermArr = null;
for (Term[] termArr : terms) {
bestTermArr = selectTermListWithTheLongestShortestTerm(bestTermArr, new HashSet<>(Arrays.asList(termArr)));
}
return new Result(false, bestTermArr);
};
}
static Function<Query, Result> spanTermQuery() {
return query -> {
Term term = ((SpanTermQuery) query).getTerm();

View File

@ -28,6 +28,7 @@ import org.apache.lucene.search.ConstantScoreQuery;
import org.apache.lucene.search.DisjunctionMaxQuery;
import org.apache.lucene.search.MatchAllDocsQuery;
import org.apache.lucene.search.MatchNoDocsQuery;
import org.apache.lucene.search.MultiPhraseQuery;
import org.apache.lucene.search.PhraseQuery;
import org.apache.lucene.search.SynonymQuery;
import org.apache.lucene.search.TermInSetQuery;
@ -93,6 +94,21 @@ public class QueryAnalyzerTests extends ESTestCase {
assertThat(terms.get(0).bytes(), equalTo(phraseQuery.getTerms()[0].bytes()));
}
public void testExtractQueryMetadata_multiPhraseQuery() {
MultiPhraseQuery multiPhraseQuery = new MultiPhraseQuery.Builder()
.add(new Term("_field", "_long_term"))
.add(new Term[] {new Term("_field", "_long_term"), new Term("_field", "_term")})
.add(new Term[] {new Term("_field", "_long_term"), new Term("_field", "_very_long_term")})
.add(new Term[] {new Term("_field", "_very_long_term")})
.build();
Result result = analyze(multiPhraseQuery);
assertThat(result.verified, is(false));
List<Term> terms = new ArrayList<>(result.terms);
assertThat(terms.size(), equalTo(1));
assertThat(terms.get(0).field(), equalTo("_field"));
assertThat(terms.get(0).bytes().utf8ToString(), equalTo("_very_long_term"));
}
public void testExtractQueryMetadata_booleanQuery() {
BooleanQuery.Builder builder = new BooleanQuery.Builder();
TermQuery termQuery1 = new TermQuery(new Term("_field", "_term"));