percolator: Add support to extract terms from TermsQuery

This commit is contained in:
Martijn van Groningen 2016-03-24 10:18:11 +01:00
parent e5074e2b1b
commit 9399f673d1
2 changed files with 31 additions and 1 deletions

View File

@ -23,6 +23,7 @@ import org.apache.lucene.document.FieldType;
import org.apache.lucene.index.Fields; import org.apache.lucene.index.Fields;
import org.apache.lucene.index.IndexReader; import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.MultiFields; import org.apache.lucene.index.MultiFields;
import org.apache.lucene.index.PrefixCodedTerms;
import org.apache.lucene.index.Term; import org.apache.lucene.index.Term;
import org.apache.lucene.index.Terms; import org.apache.lucene.index.Terms;
import org.apache.lucene.index.TermsEnum; import org.apache.lucene.index.TermsEnum;
@ -92,10 +93,17 @@ public final class ExtractQueryTermsService {
* an UnsupportedQueryException is thrown. * an UnsupportedQueryException is thrown.
*/ */
static Set<Term> extractQueryTerms(Query query) { static Set<Term> extractQueryTerms(Query query) {
// TODO: add support for the TermsQuery when it has methods to access the actual terms it encapsulates
// TODO: add support for span queries // TODO: add support for span queries
if (query instanceof TermQuery) { if (query instanceof TermQuery) {
return Collections.singleton(((TermQuery) query).getTerm()); return Collections.singleton(((TermQuery) query).getTerm());
} else if (query instanceof TermsQuery) {
Set<Term> terms = new HashSet<>();
TermsQuery termsQuery = (TermsQuery) query;
PrefixCodedTerms.TermIterator iterator = termsQuery.getTermData().iterator();
for (BytesRef term = iterator.next(); term != null; term = iterator.next()) {
terms.add(new Term(iterator.field(), term));
}
return terms;
} else if (query instanceof PhraseQuery) { } else if (query instanceof PhraseQuery) {
Term[] terms = ((PhraseQuery) query).getTerms(); Term[] terms = ((PhraseQuery) query).getTerms();
if (terms.length == 0) { if (terms.length == 0) {

View File

@ -39,6 +39,7 @@ import org.elasticsearch.test.ESTestCase;
import java.util.ArrayList; import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collections; import java.util.Collections;
import java.util.HashSet; import java.util.HashSet;
import java.util.List; import java.util.List;
@ -102,6 +103,27 @@ public class ExtractQueryTermsServiceTests extends ESTestCase {
assertThat(terms.get(0).bytes(), equalTo(termQuery.getTerm().bytes())); assertThat(terms.get(0).bytes(), equalTo(termQuery.getTerm().bytes()));
} }
public void testExtractQueryMetadata_termsQuery() {
TermsQuery termsQuery = new TermsQuery("_field", new BytesRef("_term1"), new BytesRef("_term2"));
List<Term> terms = new ArrayList<>(ExtractQueryTermsService.extractQueryTerms(termsQuery));
Collections.sort(terms);
assertThat(terms.size(), equalTo(2));
assertThat(terms.get(0).field(), equalTo("_field"));
assertThat(terms.get(0).text(), equalTo("_term1"));
assertThat(terms.get(1).field(), equalTo("_field"));
assertThat(terms.get(1).text(), equalTo("_term2"));
// test with different fields
termsQuery = new TermsQuery(new Term("_field1", "_term1"), new Term("_field2", "_term2"));
terms = new ArrayList<>(ExtractQueryTermsService.extractQueryTerms(termsQuery));
Collections.sort(terms);
assertThat(terms.size(), equalTo(2));
assertThat(terms.get(0).field(), equalTo("_field1"));
assertThat(terms.get(0).text(), equalTo("_term1"));
assertThat(terms.get(1).field(), equalTo("_field2"));
assertThat(terms.get(1).text(), equalTo("_term2"));
}
public void testExtractQueryMetadata_phraseQuery() { public void testExtractQueryMetadata_phraseQuery() {
PhraseQuery phraseQuery = new PhraseQuery("_field", "_term1", "term2"); PhraseQuery phraseQuery = new PhraseQuery("_field", "_term1", "term2");
List<Term> terms = new ArrayList<>(ExtractQueryTermsService.extractQueryTerms(phraseQuery)); List<Term> terms = new ArrayList<>(ExtractQueryTermsService.extractQueryTerms(phraseQuery));