percolator: add support for term extraction for MultiPhraseQuery
This commit is contained in:
parent
9105672969
commit
81d53470e7
|
@ -28,6 +28,7 @@ import org.apache.lucene.search.BoostQuery;
|
|||
import org.apache.lucene.search.ConstantScoreQuery;
|
||||
import org.apache.lucene.search.DisjunctionMaxQuery;
|
||||
import org.apache.lucene.search.MatchNoDocsQuery;
|
||||
import org.apache.lucene.search.MultiPhraseQuery;
|
||||
import org.apache.lucene.search.PhraseQuery;
|
||||
import org.apache.lucene.search.Query;
|
||||
import org.apache.lucene.search.SynonymQuery;
|
||||
|
@ -44,6 +45,7 @@ import org.elasticsearch.common.logging.LoggerMessageFormat;
|
|||
import org.elasticsearch.common.lucene.search.function.FunctionScoreQuery;
|
||||
|
||||
import java.util.ArrayList;
|
||||
import java.util.Arrays;
|
||||
import java.util.Collections;
|
||||
import java.util.HashMap;
|
||||
import java.util.HashSet;
|
||||
|
@ -66,6 +68,7 @@ public final class QueryAnalyzer {
|
|||
map.put(CommonTermsQuery.class, commonTermsQuery());
|
||||
map.put(BlendedTermQuery.class, blendedTermQuery());
|
||||
map.put(PhraseQuery.class, phraseQuery());
|
||||
map.put(MultiPhraseQuery.class, multiPhraseQuery());
|
||||
map.put(SpanTermQuery.class, spanTermQuery());
|
||||
map.put(SpanNearQuery.class, spanNearQuery());
|
||||
map.put(SpanOrQuery.class, spanOrQuery());
|
||||
|
@ -197,6 +200,21 @@ public final class QueryAnalyzer {
|
|||
};
|
||||
}
|
||||
|
||||
static Function<Query, Result> multiPhraseQuery() {
|
||||
return query -> {
|
||||
Term[][] terms = ((MultiPhraseQuery) query).getTermArrays();
|
||||
if (terms.length == 0) {
|
||||
return new Result(true, Collections.emptySet());
|
||||
}
|
||||
|
||||
Set<Term> bestTermArr = null;
|
||||
for (Term[] termArr : terms) {
|
||||
bestTermArr = selectTermListWithTheLongestShortestTerm(bestTermArr, new HashSet<>(Arrays.asList(termArr)));
|
||||
}
|
||||
return new Result(false, bestTermArr);
|
||||
};
|
||||
}
|
||||
|
||||
static Function<Query, Result> spanTermQuery() {
|
||||
return query -> {
|
||||
Term term = ((SpanTermQuery) query).getTerm();
|
||||
|
|
|
@ -28,6 +28,7 @@ import org.apache.lucene.search.ConstantScoreQuery;
|
|||
import org.apache.lucene.search.DisjunctionMaxQuery;
|
||||
import org.apache.lucene.search.MatchAllDocsQuery;
|
||||
import org.apache.lucene.search.MatchNoDocsQuery;
|
||||
import org.apache.lucene.search.MultiPhraseQuery;
|
||||
import org.apache.lucene.search.PhraseQuery;
|
||||
import org.apache.lucene.search.SynonymQuery;
|
||||
import org.apache.lucene.search.TermInSetQuery;
|
||||
|
@ -93,6 +94,21 @@ public class QueryAnalyzerTests extends ESTestCase {
|
|||
assertThat(terms.get(0).bytes(), equalTo(phraseQuery.getTerms()[0].bytes()));
|
||||
}
|
||||
|
||||
public void testExtractQueryMetadata_multiPhraseQuery() {
|
||||
MultiPhraseQuery multiPhraseQuery = new MultiPhraseQuery.Builder()
|
||||
.add(new Term("_field", "_long_term"))
|
||||
.add(new Term[] {new Term("_field", "_long_term"), new Term("_field", "_term")})
|
||||
.add(new Term[] {new Term("_field", "_long_term"), new Term("_field", "_very_long_term")})
|
||||
.add(new Term[] {new Term("_field", "_very_long_term")})
|
||||
.build();
|
||||
Result result = analyze(multiPhraseQuery);
|
||||
assertThat(result.verified, is(false));
|
||||
List<Term> terms = new ArrayList<>(result.terms);
|
||||
assertThat(terms.size(), equalTo(1));
|
||||
assertThat(terms.get(0).field(), equalTo("_field"));
|
||||
assertThat(terms.get(0).bytes().utf8ToString(), equalTo("_very_long_term"));
|
||||
}
|
||||
|
||||
public void testExtractQueryMetadata_booleanQuery() {
|
||||
BooleanQuery.Builder builder = new BooleanQuery.Builder();
|
||||
TermQuery termQuery1 = new TermQuery(new Term("_field", "_term"));
|
||||
|
|
Loading…
Reference in New Issue