mirror of
https://github.com/honeymoose/OpenSearch.git
synced 2025-02-26 06:46:10 +00:00
Implement BlendedTermQuery#extractTerms to support highlighing.
some of the highlighters require term extraction to be implemented in order to work. BlendedTermQuery doesn't implement the trivial extraction. Closes #5246
This commit is contained in:
parent
bac09e2926
commit
4a48b93cf5
@ -27,6 +27,7 @@ import java.io.IOException;
|
||||
import java.util.Arrays;
|
||||
import java.util.Comparator;
|
||||
import java.util.List;
|
||||
import java.util.Set;
|
||||
|
||||
/**
|
||||
* BlendedTermQuery can be used to unify term statistics across
|
||||
@ -188,6 +189,13 @@ public abstract class BlendedTermQuery extends Query {
|
||||
|
||||
}
|
||||
|
||||
@Override
|
||||
public void extractTerms(Set<Term> terms) {
|
||||
for (Term term : this.terms) {
|
||||
terms.add(term);
|
||||
}
|
||||
}
|
||||
|
||||
private volatile Term[] equalTerms = null;
|
||||
|
||||
private Term[] equalsTerms() {
|
||||
|
@ -21,12 +21,11 @@ package org.apache.lucene.search.vectorhighlight;
|
||||
|
||||
import org.apache.lucene.index.IndexReader;
|
||||
import org.apache.lucene.index.Term;
|
||||
import org.apache.lucene.queries.BlendedTermQuery;
|
||||
import org.apache.lucene.queries.FilterClause;
|
||||
import org.apache.lucene.queries.TermFilter;
|
||||
import org.apache.lucene.search.*;
|
||||
import org.apache.lucene.search.spans.SpanTermQuery;
|
||||
import org.apache.lucene.util.Version;
|
||||
import org.elasticsearch.common.lucene.Lucene;
|
||||
import org.elasticsearch.common.lucene.search.MultiPhrasePrefixQuery;
|
||||
import org.elasticsearch.common.lucene.search.XBooleanFilter;
|
||||
import org.elasticsearch.common.lucene.search.XFilteredQuery;
|
||||
@ -91,7 +90,10 @@ public class CustomFieldQuery extends FieldQuery {
|
||||
flatten(((FiltersFunctionScoreQuery) sourceQuery).getSubQuery(), reader, flatQueries);
|
||||
} else if (sourceQuery instanceof MultiPhraseQuery) {
|
||||
MultiPhraseQuery q = ((MultiPhraseQuery) sourceQuery);
|
||||
convertMultiPhraseQuery(0, new int[q.getTermArrays().size()] , q, q.getTermArrays(), q.getPositions(), reader, flatQueries);
|
||||
convertMultiPhraseQuery(0, new int[q.getTermArrays().size()], q, q.getTermArrays(), q.getPositions(), reader, flatQueries);
|
||||
} else if (sourceQuery instanceof BlendedTermQuery) {
|
||||
final BlendedTermQuery blendedTermQuery = (BlendedTermQuery) sourceQuery;
|
||||
flatten(blendedTermQuery.rewrite(reader), reader, flatQueries);
|
||||
} else {
|
||||
super.flatten(sourceQuery, reader, flatQueries);
|
||||
}
|
||||
|
@ -19,10 +19,8 @@
|
||||
|
||||
package org.elasticsearch.search.highlight;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.util.Map;
|
||||
|
||||
import org.apache.lucene.index.IndexReader;
|
||||
import org.apache.lucene.queries.BlendedTermQuery;
|
||||
import org.apache.lucene.search.Query;
|
||||
import org.apache.lucene.search.highlight.QueryScorer;
|
||||
import org.apache.lucene.search.highlight.WeightedSpanTerm;
|
||||
@ -31,6 +29,9 @@ import org.elasticsearch.common.lucene.search.XFilteredQuery;
|
||||
import org.elasticsearch.common.lucene.search.function.FiltersFunctionScoreQuery;
|
||||
import org.elasticsearch.common.lucene.search.function.FunctionScoreQuery;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.util.Map;
|
||||
|
||||
public final class CustomQueryScorer extends QueryScorer {
|
||||
|
||||
public CustomQueryScorer(Query query, IndexReader reader, String field,
|
||||
@ -86,6 +87,8 @@ public final class CustomQueryScorer extends QueryScorer {
|
||||
} else if (query instanceof XFilteredQuery) {
|
||||
query = ((XFilteredQuery) query).getQuery();
|
||||
extract(query, terms);
|
||||
} else if (query instanceof BlendedTermQuery) {
|
||||
extractWeightedTerms(terms, query);
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -34,16 +34,19 @@ import org.apache.lucene.search.similarities.Similarity;
|
||||
import org.apache.lucene.store.Directory;
|
||||
import org.apache.lucene.util._TestUtil;
|
||||
import org.elasticsearch.test.ElasticsearchLuceneTestCase;
|
||||
import org.junit.Test;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.util.Arrays;
|
||||
import java.util.Collections;
|
||||
import java.util.List;
|
||||
import java.util.*;
|
||||
|
||||
import static org.hamcrest.Matchers.containsInAnyOrder;
|
||||
import static org.hamcrest.Matchers.equalTo;
|
||||
|
||||
/**
|
||||
*/
|
||||
public class BlendedTermQueryTest extends ElasticsearchLuceneTestCase {
|
||||
|
||||
@Test
|
||||
public void testBooleanQuery() throws IOException {
|
||||
Directory dir = newDirectory();
|
||||
IndexWriter w = new IndexWriter(dir, newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random())));
|
||||
@ -95,6 +98,7 @@ public class BlendedTermQueryTest extends ElasticsearchLuceneTestCase {
|
||||
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testDismaxQuery() throws IOException {
|
||||
Directory dir = newDirectory();
|
||||
IndexWriter w = new IndexWriter(dir, newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random())));
|
||||
@ -165,6 +169,7 @@ public class BlendedTermQueryTest extends ElasticsearchLuceneTestCase {
|
||||
dir.close();
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testBasics() {
|
||||
final int iters = atLeast(5);
|
||||
for (int j = 0; j < iters; j++) {
|
||||
@ -201,4 +206,20 @@ public class BlendedTermQueryTest extends ElasticsearchLuceneTestCase {
|
||||
searcher.setSimilarity(similarity);
|
||||
return searcher;
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testExtractTerms() {
|
||||
Set<Term> terms = new HashSet<Term>();
|
||||
int num = atLeast(1);
|
||||
for (int i = 0; i < num; i++) {
|
||||
terms.add(new Term(_TestUtil.randomRealisticUnicodeString(random(), 1, 10), _TestUtil.randomRealisticUnicodeString(random(), 1, 10)));
|
||||
}
|
||||
|
||||
BlendedTermQuery blendedTermQuery = random().nextBoolean() ? BlendedTermQuery.dismaxBlendedQuery(terms.toArray(new Term[0]), random().nextFloat()) :
|
||||
BlendedTermQuery.booleanBlendedQuery(terms.toArray(new Term[0]), random().nextBoolean());
|
||||
Set<Term> extracted = new HashSet<Term>();
|
||||
blendedTermQuery.extractTerms(extracted);
|
||||
assertThat(extracted.size(), equalTo(terms.size()));
|
||||
assertThat(extracted, containsInAnyOrder(terms.toArray(new Term[0])));
|
||||
}
|
||||
}
|
||||
|
@ -18,6 +18,7 @@
|
||||
*/
|
||||
package org.elasticsearch.search.highlight;
|
||||
|
||||
import com.carrotsearch.randomizedtesting.generators.RandomPicks;
|
||||
import com.google.common.base.Joiner;
|
||||
import com.google.common.collect.Iterables;
|
||||
import org.apache.lucene.util.LuceneTestCase.Slow;
|
||||
@ -54,6 +55,7 @@ import static org.elasticsearch.search.builder.SearchSourceBuilder.highlight;
|
||||
import static org.elasticsearch.search.builder.SearchSourceBuilder.searchSource;
|
||||
import static org.elasticsearch.test.hamcrest.ElasticsearchAssertions.*;
|
||||
import static org.elasticsearch.test.hamcrest.RegexMatcher.matches;
|
||||
import static org.elasticsearch.test.hamcrest.ElasticsearchAssertions.assertHitCount;
|
||||
import static org.hamcrest.Matchers.*;
|
||||
|
||||
/**
|
||||
@ -2065,14 +2067,13 @@ public class HighlighterSearchTests extends ElasticsearchIntegrationTest {
|
||||
assertHighlight(searchResponse, 0, "field2", 0, equalTo("The quick brown <field2>fox</field2> jumps over the lazy dog."));
|
||||
assertHighlight(searchResponse, 0, "field2", 1, equalTo("The lazy red <field2>fox</field2> jumps over the quick dog."));
|
||||
assertHighlight(searchResponse, 0, "field2", 2, 3, equalTo("The quick brown dog jumps over the lazy <field2>fox</field2>."));
|
||||
|
||||
logger.info("--> highlighting and searching on field1 and field2 via multi_match query");
|
||||
final MultiMatchQueryBuilder mmquery = multiMatchQuery("fox", "field1", "field2").type(RandomPicks.randomFrom(getRandom(), MultiMatchQueryBuilder.Type.values()));
|
||||
source = searchSource()
|
||||
.query(multiMatchQuery("fox", "field1", "field2"))
|
||||
.highlight(highlight()
|
||||
.field(new HighlightBuilder.Field("field1").requireFieldMatch(true).preTags("<field1>").postTags("</field1>"))
|
||||
.field(new HighlightBuilder.Field("field2").requireFieldMatch(true).preTags("<field2>").postTags("</field2>")));
|
||||
|
||||
.query(mmquery)
|
||||
.highlight(highlight().highlightQuery(randomBoolean() ? mmquery : null)
|
||||
.field(new HighlightBuilder.Field("field1").requireFieldMatch(true).preTags("<field1>").postTags("</field1>"))
|
||||
.field(new HighlightBuilder.Field("field2").requireFieldMatch(true).preTags("<field2>").postTags("</field2>")));
|
||||
searchResponse = client().search(searchRequest("test").source(source)).actionGet();
|
||||
assertHitCount(searchResponse, 1l);
|
||||
|
||||
@ -2085,6 +2086,39 @@ public class HighlighterSearchTests extends ElasticsearchIntegrationTest {
|
||||
assertHighlight(searchResponse, 0, "field2", 2, 3, equalTo("The quick brown dog jumps over the lazy <field2>fox</field2>."));
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testMultiMatchQueryHighlight() throws IOException {
|
||||
String[] highlighterTypes = new String[] {"fvh", "plain", "postings"};
|
||||
XContentBuilder mapping = XContentFactory.jsonBuilder().startObject().startObject("type1")
|
||||
.startObject("_all").field("store", "yes").field("index_options", "offsets").endObject()
|
||||
.startObject("properties")
|
||||
.startObject("field1").field("type", "string").field("index_options", "offsets").field("term_vector", "with_positions_offsets").endObject()
|
||||
.startObject("field2").field("type", "string").field("index_options", "offsets").field("term_vector", "with_positions_offsets").endObject()
|
||||
.endObject()
|
||||
.endObject().endObject();
|
||||
assertAcked(client().admin().indices().prepareCreate("test").addMapping("type1", mapping));
|
||||
ensureGreen();
|
||||
client().prepareIndex("test", "type1")
|
||||
.setSource("field1", "The quick brown fox jumps over",
|
||||
"field2", "The quick brown fox jumps over").get();
|
||||
refresh();
|
||||
final int iters = atLeast(20);
|
||||
for (int i = 0; i < iters; i++) {
|
||||
MultiMatchQueryBuilder.Type matchQueryType = rarely() ? null : RandomPicks.randomFrom(getRandom(), MultiMatchQueryBuilder.Type.values());
|
||||
final MultiMatchQueryBuilder multiMatchQueryBuilder = multiMatchQuery("the quick brown fox", "field1", "field2").type(matchQueryType);
|
||||
String type = rarely() ? null : RandomPicks.randomFrom(getRandom(),highlighterTypes);
|
||||
SearchSourceBuilder source = searchSource()
|
||||
.query(multiMatchQueryBuilder)
|
||||
.highlight(highlight().highlightQuery(randomBoolean() ? multiMatchQueryBuilder : null).highlighterType(type)
|
||||
.field(new Field("field1").requireFieldMatch(true).preTags("<field1>").postTags("</field1>")));
|
||||
logger.info("Running multi-match type: [" + matchQueryType + "] highlight with type: [" + type + "]");
|
||||
SearchResponse searchResponse = client().search(searchRequest("test").source(source)).actionGet();
|
||||
assertHitCount(searchResponse, 1l);
|
||||
assertHighlight(searchResponse, 0, "field1", 0, anyOf(equalTo("<field1>The quick brown fox</field1> jumps over"),
|
||||
equalTo("<field1>The</field1> <field1>quick</field1> <field1>brown</field1> <field1>fox</field1> jumps over")));
|
||||
}
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testPostingsHighlighterOrderByScore() throws Exception {
|
||||
assertAcked(client().admin().indices().prepareCreate("test").addMapping("type1", type1PostingsffsetsMapping()));
|
||||
@ -2680,7 +2714,7 @@ public class HighlighterSearchTests extends ElasticsearchIntegrationTest {
|
||||
queryString("\"highlight words together\"").field("field1^100").autoGeneratePhraseQueries(true));
|
||||
}
|
||||
|
||||
private <P extends QueryBuilder & BoostableQueryBuilder> void
|
||||
private <P extends QueryBuilder & BoostableQueryBuilder<?>> void
|
||||
phraseBoostTestCaseForClauses(String highlighterType, float boost, QueryBuilder terms, P phrase) {
|
||||
Matcher<String> highlightedMatcher = Matchers.<String>either(containsString("<em>highlight words together</em>")).or(
|
||||
containsString("<em>highlight</em> <em>words</em> <em>together</em>"));
|
||||
|
Loading…
x
Reference in New Issue
Block a user