percolator: Add support to extract terms from several types of span queries
This commit is contained in:
parent
26a0fb37a4
commit
7600dc9943
|
@ -37,6 +37,16 @@ import org.apache.lucene.search.ConstantScoreQuery;
|
||||||
import org.apache.lucene.search.PhraseQuery;
|
import org.apache.lucene.search.PhraseQuery;
|
||||||
import org.apache.lucene.search.Query;
|
import org.apache.lucene.search.Query;
|
||||||
import org.apache.lucene.search.TermQuery;
|
import org.apache.lucene.search.TermQuery;
|
||||||
|
import org.apache.lucene.search.spans.FieldMaskingSpanQuery;
|
||||||
|
import org.apache.lucene.search.spans.SpanContainingQuery;
|
||||||
|
import org.apache.lucene.search.spans.SpanFirstQuery;
|
||||||
|
import org.apache.lucene.search.spans.SpanMultiTermQueryWrapper;
|
||||||
|
import org.apache.lucene.search.spans.SpanNearQuery;
|
||||||
|
import org.apache.lucene.search.spans.SpanNotQuery;
|
||||||
|
import org.apache.lucene.search.spans.SpanOrQuery;
|
||||||
|
import org.apache.lucene.search.spans.SpanQuery;
|
||||||
|
import org.apache.lucene.search.spans.SpanTermQuery;
|
||||||
|
import org.apache.lucene.search.spans.SpanWithinQuery;
|
||||||
import org.apache.lucene.util.BytesRef;
|
import org.apache.lucene.util.BytesRef;
|
||||||
import org.apache.lucene.util.BytesRefBuilder;
|
import org.apache.lucene.util.BytesRefBuilder;
|
||||||
import org.elasticsearch.common.logging.LoggerMessageFormat;
|
import org.elasticsearch.common.logging.LoggerMessageFormat;
|
||||||
|
@ -95,7 +105,6 @@ public final class ExtractQueryTermsService {
|
||||||
* an UnsupportedQueryException is thrown.
|
* an UnsupportedQueryException is thrown.
|
||||||
*/
|
*/
|
||||||
static Set<Term> extractQueryTerms(Query query) {
|
static Set<Term> extractQueryTerms(Query query) {
|
||||||
// TODO: add support for span queries
|
|
||||||
if (query instanceof TermQuery) {
|
if (query instanceof TermQuery) {
|
||||||
return Collections.singleton(((TermQuery) query).getTerm());
|
return Collections.singleton(((TermQuery) query).getTerm());
|
||||||
} else if (query instanceof TermsQuery) {
|
} else if (query instanceof TermsQuery) {
|
||||||
|
@ -170,6 +179,27 @@ public final class ExtractQueryTermsService {
|
||||||
} else if (query instanceof BlendedTermQuery) {
|
} else if (query instanceof BlendedTermQuery) {
|
||||||
List<Term> terms = ((BlendedTermQuery) query).getTerms();
|
List<Term> terms = ((BlendedTermQuery) query).getTerms();
|
||||||
return new HashSet<>(terms);
|
return new HashSet<>(terms);
|
||||||
|
} else if (query instanceof SpanTermQuery) {
|
||||||
|
return Collections.singleton(((SpanTermQuery) query).getTerm());
|
||||||
|
} else if (query instanceof SpanNearQuery) {
|
||||||
|
Set<Term> bestClause = null;
|
||||||
|
SpanNearQuery spanNearQuery = (SpanNearQuery) query;
|
||||||
|
for (SpanQuery clause : spanNearQuery.getClauses()) {
|
||||||
|
Set<Term> temp = extractQueryTerms(clause);
|
||||||
|
bestClause = selectTermListWithTheLongestShortestTerm(temp, bestClause);
|
||||||
|
}
|
||||||
|
return bestClause;
|
||||||
|
} else if (query instanceof SpanOrQuery) {
|
||||||
|
Set<Term> terms = new HashSet<>();
|
||||||
|
SpanOrQuery spanOrQuery = (SpanOrQuery) query;
|
||||||
|
for (SpanQuery clause : spanOrQuery.getClauses()) {
|
||||||
|
terms.addAll(extractQueryTerms(clause));
|
||||||
|
}
|
||||||
|
return terms;
|
||||||
|
} else if (query instanceof SpanFirstQuery) {
|
||||||
|
return extractQueryTerms(((SpanFirstQuery)query).getMatch());
|
||||||
|
} else if (query instanceof SpanNotQuery) {
|
||||||
|
return extractQueryTerms(((SpanNotQuery) query).getInclude());
|
||||||
} else {
|
} else {
|
||||||
throw new UnsupportedQueryException(query);
|
throw new UnsupportedQueryException(query);
|
||||||
}
|
}
|
||||||
|
|
|
@ -35,12 +35,19 @@ import org.apache.lucene.search.PhraseQuery;
|
||||||
import org.apache.lucene.search.Query;
|
import org.apache.lucene.search.Query;
|
||||||
import org.apache.lucene.search.TermQuery;
|
import org.apache.lucene.search.TermQuery;
|
||||||
import org.apache.lucene.search.TermRangeQuery;
|
import org.apache.lucene.search.TermRangeQuery;
|
||||||
|
import org.apache.lucene.search.spans.FieldMaskingSpanQuery;
|
||||||
|
import org.apache.lucene.search.spans.SpanFirstQuery;
|
||||||
|
import org.apache.lucene.search.spans.SpanNearQuery;
|
||||||
|
import org.apache.lucene.search.spans.SpanNotQuery;
|
||||||
|
import org.apache.lucene.search.spans.SpanOrQuery;
|
||||||
|
import org.apache.lucene.search.spans.SpanTermQuery;
|
||||||
import org.apache.lucene.util.BytesRef;
|
import org.apache.lucene.util.BytesRef;
|
||||||
import org.elasticsearch.index.mapper.ParseContext;
|
import org.elasticsearch.index.mapper.ParseContext;
|
||||||
import org.elasticsearch.test.ESTestCase;
|
import org.elasticsearch.test.ESTestCase;
|
||||||
|
|
||||||
|
|
||||||
import java.util.ArrayList;
|
import java.util.ArrayList;
|
||||||
|
import java.util.Arrays;
|
||||||
import java.util.Collections;
|
import java.util.Collections;
|
||||||
import java.util.HashSet;
|
import java.util.HashSet;
|
||||||
import java.util.List;
|
import java.util.List;
|
||||||
|
@ -242,6 +249,56 @@ public class ExtractQueryTermsServiceTests extends ESTestCase {
|
||||||
assertThat(result.get(1).text(), equalTo("_term2"));
|
assertThat(result.get(1).text(), equalTo("_term2"));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public void testExtractQueryMetadata_spanTermQuery() {
|
||||||
|
// the following span queries aren't exposed in the query dsl and are therefor not supported:
|
||||||
|
// 1) SpanPositionRangeQuery
|
||||||
|
// 2) PayloadScoreQuery
|
||||||
|
// 3) SpanBoostQuery
|
||||||
|
|
||||||
|
// The following span queries can't be supported because of how these queries work:
|
||||||
|
// 1) SpanMultiTermQueryWrapper, not supported, because there is no support for MTQ typed queries yet.
|
||||||
|
// 2) SpanContainingQuery, is kind of range of spans and we don't know what is between the little and big terms
|
||||||
|
// 3) SpanWithinQuery, same reason as SpanContainingQuery
|
||||||
|
// 4) FieldMaskingSpanQuery is a tricky query so we shouldn't optimize this
|
||||||
|
|
||||||
|
SpanTermQuery spanTermQuery1 = new SpanTermQuery(new Term("_field", "_short_term"));
|
||||||
|
Set<Term> terms = ExtractQueryTermsService.extractQueryTerms(spanTermQuery1);
|
||||||
|
assertTermsEqual(terms, spanTermQuery1.getTerm());
|
||||||
|
}
|
||||||
|
|
||||||
|
public void testExtractQueryMetadata_spanNearQuery() {
|
||||||
|
SpanTermQuery spanTermQuery1 = new SpanTermQuery(new Term("_field", "_short_term"));
|
||||||
|
SpanTermQuery spanTermQuery2 = new SpanTermQuery(new Term("_field", "_very_long_term"));
|
||||||
|
SpanNearQuery spanNearQuery = new SpanNearQuery.Builder("_field", true)
|
||||||
|
.addClause(spanTermQuery1).addClause(spanTermQuery2).build();
|
||||||
|
Set<Term> terms = ExtractQueryTermsService.extractQueryTerms(spanNearQuery);
|
||||||
|
assertTermsEqual(terms, spanTermQuery2.getTerm());
|
||||||
|
}
|
||||||
|
|
||||||
|
public void testExtractQueryMetadata_spanOrQuery() {
|
||||||
|
SpanTermQuery spanTermQuery1 = new SpanTermQuery(new Term("_field", "_short_term"));
|
||||||
|
SpanTermQuery spanTermQuery2 = new SpanTermQuery(new Term("_field", "_very_long_term"));
|
||||||
|
SpanOrQuery spanOrQuery = new SpanOrQuery(spanTermQuery1, spanTermQuery2);
|
||||||
|
Set<Term> terms = ExtractQueryTermsService.extractQueryTerms(spanOrQuery);
|
||||||
|
assertTermsEqual(terms, spanTermQuery1.getTerm(), spanTermQuery2.getTerm());
|
||||||
|
}
|
||||||
|
|
||||||
|
public void testExtractQueryMetadata_spanFirstQuery() {
|
||||||
|
SpanTermQuery spanTermQuery1 = new SpanTermQuery(new Term("_field", "_short_term"));
|
||||||
|
SpanTermQuery spanTermQuery2 = new SpanTermQuery(new Term("_field", "_very_long_term"));
|
||||||
|
SpanFirstQuery spanFirstQuery = new SpanFirstQuery(spanTermQuery1, 20);
|
||||||
|
Set<Term> terms = ExtractQueryTermsService.extractQueryTerms(spanFirstQuery);
|
||||||
|
assertTermsEqual(terms, spanTermQuery1.getTerm());
|
||||||
|
}
|
||||||
|
|
||||||
|
public void testExtractQueryMetadata_spanNotQuery() {
|
||||||
|
SpanTermQuery spanTermQuery1 = new SpanTermQuery(new Term("_field", "_short_term"));
|
||||||
|
SpanTermQuery spanTermQuery2 = new SpanTermQuery(new Term("_field", "_very_long_term"));
|
||||||
|
SpanNotQuery spanNotQuery = new SpanNotQuery(spanTermQuery1, spanTermQuery2);
|
||||||
|
Set<Term> terms = ExtractQueryTermsService.extractQueryTerms(spanNotQuery);
|
||||||
|
assertTermsEqual(terms, spanTermQuery1.getTerm());
|
||||||
|
}
|
||||||
|
|
||||||
public void testExtractQueryMetadata_unsupportedQuery() {
|
public void testExtractQueryMetadata_unsupportedQuery() {
|
||||||
TermRangeQuery termRangeQuery = new TermRangeQuery("_field", null, null, true, false);
|
TermRangeQuery termRangeQuery = new TermRangeQuery("_field", null, null, true, false);
|
||||||
|
|
||||||
|
@ -330,4 +387,8 @@ public class ExtractQueryTermsServiceTests extends ESTestCase {
|
||||||
assertThat(((TermQuery) booleanQuery.clauses().get(i).getQuery()).getTerm().bytes().utf8ToString(), equalTo(expectedValue));
|
assertThat(((TermQuery) booleanQuery.clauses().get(i).getQuery()).getTerm().bytes().utf8ToString(), equalTo(expectedValue));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
private static void assertTermsEqual(Set<Term> actual, Term... expected) {
|
||||||
|
assertEquals(new HashSet<>(Arrays.asList(expected)), actual);
|
||||||
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
|
@ -45,6 +45,10 @@ import org.apache.lucene.search.Query;
|
||||||
import org.apache.lucene.search.TermQuery;
|
import org.apache.lucene.search.TermQuery;
|
||||||
import org.apache.lucene.search.TopDocs;
|
import org.apache.lucene.search.TopDocs;
|
||||||
import org.apache.lucene.search.WildcardQuery;
|
import org.apache.lucene.search.WildcardQuery;
|
||||||
|
import org.apache.lucene.search.spans.SpanNearQuery;
|
||||||
|
import org.apache.lucene.search.spans.SpanNotQuery;
|
||||||
|
import org.apache.lucene.search.spans.SpanOrQuery;
|
||||||
|
import org.apache.lucene.search.spans.SpanTermQuery;
|
||||||
import org.apache.lucene.store.Directory;
|
import org.apache.lucene.store.Directory;
|
||||||
import org.elasticsearch.common.bytes.BytesArray;
|
import org.elasticsearch.common.bytes.BytesArray;
|
||||||
import org.elasticsearch.index.mapper.ParseContext;
|
import org.elasticsearch.index.mapper.ParseContext;
|
||||||
|
@ -194,6 +198,8 @@ public class PercolatorQueryTests extends ESTestCase {
|
||||||
query = new WildcardQuery(new Term("field", id + "*"));
|
query = new WildcardQuery(new Term("field", id + "*"));
|
||||||
} else if (randomBoolean()) {
|
} else if (randomBoolean()) {
|
||||||
query = new CustomQuery(new Term("field", id + "*"));
|
query = new CustomQuery(new Term("field", id + "*"));
|
||||||
|
} else if (randomBoolean()) {
|
||||||
|
query = new SpanTermQuery(new Term("field", id));
|
||||||
} else {
|
} else {
|
||||||
query = new TermQuery(new Term("field", id));
|
query = new TermQuery(new Term("field", id));
|
||||||
}
|
}
|
||||||
|
@ -223,6 +229,27 @@ public class PercolatorQueryTests extends ESTestCase {
|
||||||
new Term("field", "brown"), new Term("field", "fox")}, false);
|
new Term("field", "brown"), new Term("field", "fox")}, false);
|
||||||
addPercolatorQuery("_id2", blendedTermQuery);
|
addPercolatorQuery("_id2", blendedTermQuery);
|
||||||
|
|
||||||
|
SpanNearQuery spanNearQuery = new SpanNearQuery.Builder("field", true)
|
||||||
|
.addClause(new SpanTermQuery(new Term("field", "quick")))
|
||||||
|
.addClause(new SpanTermQuery(new Term("field", "brown")))
|
||||||
|
.addClause(new SpanTermQuery(new Term("field", "fox")))
|
||||||
|
.build();
|
||||||
|
addPercolatorQuery("_id3", spanNearQuery);
|
||||||
|
|
||||||
|
SpanNearQuery spanNearQuery2 = new SpanNearQuery.Builder("field", true)
|
||||||
|
.addClause(new SpanTermQuery(new Term("field", "the")))
|
||||||
|
.addClause(new SpanTermQuery(new Term("field", "lazy")))
|
||||||
|
.addClause(new SpanTermQuery(new Term("field", "doc")))
|
||||||
|
.build();
|
||||||
|
SpanOrQuery spanOrQuery = new SpanOrQuery(
|
||||||
|
spanNearQuery,
|
||||||
|
spanNearQuery2
|
||||||
|
);
|
||||||
|
addPercolatorQuery("_id4", spanOrQuery);
|
||||||
|
|
||||||
|
SpanNotQuery spanNotQuery = new SpanNotQuery(spanNearQuery, spanNearQuery);
|
||||||
|
addPercolatorQuery("_id5", spanNotQuery);
|
||||||
|
|
||||||
indexWriter.close();
|
indexWriter.close();
|
||||||
directoryReader = DirectoryReader.open(directory);
|
directoryReader = DirectoryReader.open(directory);
|
||||||
IndexSearcher shardSearcher = newSearcher(directoryReader);
|
IndexSearcher shardSearcher = newSearcher(directoryReader);
|
||||||
|
|
|
@ -33,6 +33,9 @@ import static org.elasticsearch.index.query.QueryBuilders.matchAllQuery;
|
||||||
import static org.elasticsearch.index.query.QueryBuilders.matchQuery;
|
import static org.elasticsearch.index.query.QueryBuilders.matchQuery;
|
||||||
import static org.elasticsearch.index.query.QueryBuilders.multiMatchQuery;
|
import static org.elasticsearch.index.query.QueryBuilders.multiMatchQuery;
|
||||||
import static org.elasticsearch.index.query.QueryBuilders.percolatorQuery;
|
import static org.elasticsearch.index.query.QueryBuilders.percolatorQuery;
|
||||||
|
import static org.elasticsearch.index.query.QueryBuilders.spanNearQuery;
|
||||||
|
import static org.elasticsearch.index.query.QueryBuilders.spanNotQuery;
|
||||||
|
import static org.elasticsearch.index.query.QueryBuilders.spanTermQuery;
|
||||||
import static org.elasticsearch.index.query.QueryBuilders.termQuery;
|
import static org.elasticsearch.index.query.QueryBuilders.termQuery;
|
||||||
import static org.hamcrest.Matchers.equalTo;
|
import static org.hamcrest.Matchers.equalTo;
|
||||||
import static org.elasticsearch.test.hamcrest.ElasticsearchAssertions.assertHitCount;
|
import static org.elasticsearch.test.hamcrest.ElasticsearchAssertions.assertHitCount;
|
||||||
|
@ -99,6 +102,44 @@ public class PercolatorQuerySearchIT extends ESSingleNodeTestCase {
|
||||||
.setSource(jsonBuilder().startObject().field("query", multiMatchQuery("quick brown fox", "field1", "field2")
|
.setSource(jsonBuilder().startObject().field("query", multiMatchQuery("quick brown fox", "field1", "field2")
|
||||||
.type(MultiMatchQueryBuilder.Type.CROSS_FIELDS)).endObject())
|
.type(MultiMatchQueryBuilder.Type.CROSS_FIELDS)).endObject())
|
||||||
.get();
|
.get();
|
||||||
|
client().prepareIndex("test", PercolatorFieldMapper.TYPE_NAME, "3")
|
||||||
|
.setSource(jsonBuilder().startObject().field("query",
|
||||||
|
spanNearQuery(spanTermQuery("field1", "quick"), 0)
|
||||||
|
.clause(spanTermQuery("field1", "brown"))
|
||||||
|
.clause(spanTermQuery("field1", "fox"))
|
||||||
|
.inOrder(true)
|
||||||
|
).endObject())
|
||||||
|
.get();
|
||||||
|
client().admin().indices().prepareRefresh().get();
|
||||||
|
|
||||||
|
client().prepareIndex("test", PercolatorFieldMapper.TYPE_NAME, "4")
|
||||||
|
.setSource(jsonBuilder().startObject().field("query",
|
||||||
|
spanNotQuery(
|
||||||
|
spanNearQuery(spanTermQuery("field1", "quick"), 0)
|
||||||
|
.clause(spanTermQuery("field1", "brown"))
|
||||||
|
.clause(spanTermQuery("field1", "fox"))
|
||||||
|
.inOrder(true),
|
||||||
|
spanNearQuery(spanTermQuery("field1", "the"), 0)
|
||||||
|
.clause(spanTermQuery("field1", "lazy"))
|
||||||
|
.clause(spanTermQuery("field1", "dog"))
|
||||||
|
.inOrder(true)).dist(2)
|
||||||
|
).endObject())
|
||||||
|
.get();
|
||||||
|
|
||||||
|
// doesn't match
|
||||||
|
client().prepareIndex("test", PercolatorFieldMapper.TYPE_NAME, "5")
|
||||||
|
.setSource(jsonBuilder().startObject().field("query",
|
||||||
|
spanNotQuery(
|
||||||
|
spanNearQuery(spanTermQuery("field1", "quick"), 0)
|
||||||
|
.clause(spanTermQuery("field1", "brown"))
|
||||||
|
.clause(spanTermQuery("field1", "fox"))
|
||||||
|
.inOrder(true),
|
||||||
|
spanNearQuery(spanTermQuery("field1", "the"), 0)
|
||||||
|
.clause(spanTermQuery("field1", "lazy"))
|
||||||
|
.clause(spanTermQuery("field1", "dog"))
|
||||||
|
.inOrder(true)).dist(3)
|
||||||
|
).endObject())
|
||||||
|
.get();
|
||||||
client().admin().indices().prepareRefresh().get();
|
client().admin().indices().prepareRefresh().get();
|
||||||
|
|
||||||
BytesReference source = jsonBuilder().startObject()
|
BytesReference source = jsonBuilder().startObject()
|
||||||
|
@ -108,9 +149,11 @@ public class PercolatorQuerySearchIT extends ESSingleNodeTestCase {
|
||||||
SearchResponse response = client().prepareSearch()
|
SearchResponse response = client().prepareSearch()
|
||||||
.setQuery(percolatorQuery("type", source))
|
.setQuery(percolatorQuery("type", source))
|
||||||
.get();
|
.get();
|
||||||
assertHitCount(response, 2);
|
assertHitCount(response, 4);
|
||||||
assertThat(response.getHits().getAt(0).getId(), equalTo("1"));
|
assertThat(response.getHits().getAt(0).getId(), equalTo("1"));
|
||||||
assertThat(response.getHits().getAt(1).getId(), equalTo("2"));
|
assertThat(response.getHits().getAt(1).getId(), equalTo("2"));
|
||||||
|
assertThat(response.getHits().getAt(2).getId(), equalTo("3"));
|
||||||
|
assertThat(response.getHits().getAt(3).getId(), equalTo("4"));
|
||||||
}
|
}
|
||||||
|
|
||||||
public void testPercolatorQueryWithHighlighting() throws Exception {
|
public void testPercolatorQueryWithHighlighting() throws Exception {
|
||||||
|
|
Loading…
Reference in New Issue