Implement BlendedTermQuery#extractTerms to support highlighing.

some of the highlighters require term extraction to be implemented in
order to work. BlendedTermQuery doesn't implement the trivial extraction.

Closes #5246
This commit is contained in:
Simon Willnauer 2014-02-25 10:10:36 +01:00
parent bac09e2926
commit 4a48b93cf5
5 changed files with 84 additions and 16 deletions

View File

@ -27,6 +27,7 @@ import java.io.IOException;
import java.util.Arrays;
import java.util.Comparator;
import java.util.List;
import java.util.Set;
/**
* BlendedTermQuery can be used to unify term statistics across
@ -188,6 +189,13 @@ public abstract class BlendedTermQuery extends Query {
}
@Override
public void extractTerms(Set<Term> terms) {
for (Term term : this.terms) {
terms.add(term);
}
}
private volatile Term[] equalTerms = null;
private Term[] equalsTerms() {

View File

@ -21,12 +21,11 @@ package org.apache.lucene.search.vectorhighlight;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.Term;
import org.apache.lucene.queries.BlendedTermQuery;
import org.apache.lucene.queries.FilterClause;
import org.apache.lucene.queries.TermFilter;
import org.apache.lucene.search.*;
import org.apache.lucene.search.spans.SpanTermQuery;
import org.apache.lucene.util.Version;
import org.elasticsearch.common.lucene.Lucene;
import org.elasticsearch.common.lucene.search.MultiPhrasePrefixQuery;
import org.elasticsearch.common.lucene.search.XBooleanFilter;
import org.elasticsearch.common.lucene.search.XFilteredQuery;
@ -91,7 +90,10 @@ public class CustomFieldQuery extends FieldQuery {
flatten(((FiltersFunctionScoreQuery) sourceQuery).getSubQuery(), reader, flatQueries);
} else if (sourceQuery instanceof MultiPhraseQuery) {
MultiPhraseQuery q = ((MultiPhraseQuery) sourceQuery);
convertMultiPhraseQuery(0, new int[q.getTermArrays().size()] , q, q.getTermArrays(), q.getPositions(), reader, flatQueries);
convertMultiPhraseQuery(0, new int[q.getTermArrays().size()], q, q.getTermArrays(), q.getPositions(), reader, flatQueries);
} else if (sourceQuery instanceof BlendedTermQuery) {
final BlendedTermQuery blendedTermQuery = (BlendedTermQuery) sourceQuery;
flatten(blendedTermQuery.rewrite(reader), reader, flatQueries);
} else {
super.flatten(sourceQuery, reader, flatQueries);
}

View File

@ -19,10 +19,8 @@
package org.elasticsearch.search.highlight;
import java.io.IOException;
import java.util.Map;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.queries.BlendedTermQuery;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.highlight.QueryScorer;
import org.apache.lucene.search.highlight.WeightedSpanTerm;
@ -31,6 +29,9 @@ import org.elasticsearch.common.lucene.search.XFilteredQuery;
import org.elasticsearch.common.lucene.search.function.FiltersFunctionScoreQuery;
import org.elasticsearch.common.lucene.search.function.FunctionScoreQuery;
import java.io.IOException;
import java.util.Map;
public final class CustomQueryScorer extends QueryScorer {
public CustomQueryScorer(Query query, IndexReader reader, String field,
@ -86,6 +87,8 @@ public final class CustomQueryScorer extends QueryScorer {
} else if (query instanceof XFilteredQuery) {
query = ((XFilteredQuery) query).getQuery();
extract(query, terms);
} else if (query instanceof BlendedTermQuery) {
extractWeightedTerms(terms, query);
}
}

View File

@ -34,16 +34,19 @@ import org.apache.lucene.search.similarities.Similarity;
import org.apache.lucene.store.Directory;
import org.apache.lucene.util._TestUtil;
import org.elasticsearch.test.ElasticsearchLuceneTestCase;
import org.junit.Test;
import java.io.IOException;
import java.util.Arrays;
import java.util.Collections;
import java.util.List;
import java.util.*;
import static org.hamcrest.Matchers.containsInAnyOrder;
import static org.hamcrest.Matchers.equalTo;
/**
*/
public class BlendedTermQueryTest extends ElasticsearchLuceneTestCase {
@Test
public void testBooleanQuery() throws IOException {
Directory dir = newDirectory();
IndexWriter w = new IndexWriter(dir, newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random())));
@ -95,6 +98,7 @@ public class BlendedTermQueryTest extends ElasticsearchLuceneTestCase {
}
@Test
public void testDismaxQuery() throws IOException {
Directory dir = newDirectory();
IndexWriter w = new IndexWriter(dir, newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random())));
@ -165,6 +169,7 @@ public class BlendedTermQueryTest extends ElasticsearchLuceneTestCase {
dir.close();
}
@Test
public void testBasics() {
final int iters = atLeast(5);
for (int j = 0; j < iters; j++) {
@ -201,4 +206,20 @@ public class BlendedTermQueryTest extends ElasticsearchLuceneTestCase {
searcher.setSimilarity(similarity);
return searcher;
}
@Test
public void testExtractTerms() {
Set<Term> terms = new HashSet<Term>();
int num = atLeast(1);
for (int i = 0; i < num; i++) {
terms.add(new Term(_TestUtil.randomRealisticUnicodeString(random(), 1, 10), _TestUtil.randomRealisticUnicodeString(random(), 1, 10)));
}
BlendedTermQuery blendedTermQuery = random().nextBoolean() ? BlendedTermQuery.dismaxBlendedQuery(terms.toArray(new Term[0]), random().nextFloat()) :
BlendedTermQuery.booleanBlendedQuery(terms.toArray(new Term[0]), random().nextBoolean());
Set<Term> extracted = new HashSet<Term>();
blendedTermQuery.extractTerms(extracted);
assertThat(extracted.size(), equalTo(terms.size()));
assertThat(extracted, containsInAnyOrder(terms.toArray(new Term[0])));
}
}

View File

@ -18,6 +18,7 @@
*/
package org.elasticsearch.search.highlight;
import com.carrotsearch.randomizedtesting.generators.RandomPicks;
import com.google.common.base.Joiner;
import com.google.common.collect.Iterables;
import org.apache.lucene.util.LuceneTestCase.Slow;
@ -54,6 +55,7 @@ import static org.elasticsearch.search.builder.SearchSourceBuilder.highlight;
import static org.elasticsearch.search.builder.SearchSourceBuilder.searchSource;
import static org.elasticsearch.test.hamcrest.ElasticsearchAssertions.*;
import static org.elasticsearch.test.hamcrest.RegexMatcher.matches;
import static org.elasticsearch.test.hamcrest.ElasticsearchAssertions.assertHitCount;
import static org.hamcrest.Matchers.*;
/**
@ -2065,14 +2067,13 @@ public class HighlighterSearchTests extends ElasticsearchIntegrationTest {
assertHighlight(searchResponse, 0, "field2", 0, equalTo("The quick brown <field2>fox</field2> jumps over the lazy dog."));
assertHighlight(searchResponse, 0, "field2", 1, equalTo("The lazy red <field2>fox</field2> jumps over the quick dog."));
assertHighlight(searchResponse, 0, "field2", 2, 3, equalTo("The quick brown dog jumps over the lazy <field2>fox</field2>."));
logger.info("--> highlighting and searching on field1 and field2 via multi_match query");
final MultiMatchQueryBuilder mmquery = multiMatchQuery("fox", "field1", "field2").type(RandomPicks.randomFrom(getRandom(), MultiMatchQueryBuilder.Type.values()));
source = searchSource()
.query(multiMatchQuery("fox", "field1", "field2"))
.highlight(highlight()
.field(new HighlightBuilder.Field("field1").requireFieldMatch(true).preTags("<field1>").postTags("</field1>"))
.field(new HighlightBuilder.Field("field2").requireFieldMatch(true).preTags("<field2>").postTags("</field2>")));
.query(mmquery)
.highlight(highlight().highlightQuery(randomBoolean() ? mmquery : null)
.field(new HighlightBuilder.Field("field1").requireFieldMatch(true).preTags("<field1>").postTags("</field1>"))
.field(new HighlightBuilder.Field("field2").requireFieldMatch(true).preTags("<field2>").postTags("</field2>")));
searchResponse = client().search(searchRequest("test").source(source)).actionGet();
assertHitCount(searchResponse, 1l);
@ -2085,6 +2086,39 @@ public class HighlighterSearchTests extends ElasticsearchIntegrationTest {
assertHighlight(searchResponse, 0, "field2", 2, 3, equalTo("The quick brown dog jumps over the lazy <field2>fox</field2>."));
}
@Test
public void testMultiMatchQueryHighlight() throws IOException {
String[] highlighterTypes = new String[] {"fvh", "plain", "postings"};
XContentBuilder mapping = XContentFactory.jsonBuilder().startObject().startObject("type1")
.startObject("_all").field("store", "yes").field("index_options", "offsets").endObject()
.startObject("properties")
.startObject("field1").field("type", "string").field("index_options", "offsets").field("term_vector", "with_positions_offsets").endObject()
.startObject("field2").field("type", "string").field("index_options", "offsets").field("term_vector", "with_positions_offsets").endObject()
.endObject()
.endObject().endObject();
assertAcked(client().admin().indices().prepareCreate("test").addMapping("type1", mapping));
ensureGreen();
client().prepareIndex("test", "type1")
.setSource("field1", "The quick brown fox jumps over",
"field2", "The quick brown fox jumps over").get();
refresh();
final int iters = atLeast(20);
for (int i = 0; i < iters; i++) {
MultiMatchQueryBuilder.Type matchQueryType = rarely() ? null : RandomPicks.randomFrom(getRandom(), MultiMatchQueryBuilder.Type.values());
final MultiMatchQueryBuilder multiMatchQueryBuilder = multiMatchQuery("the quick brown fox", "field1", "field2").type(matchQueryType);
String type = rarely() ? null : RandomPicks.randomFrom(getRandom(),highlighterTypes);
SearchSourceBuilder source = searchSource()
.query(multiMatchQueryBuilder)
.highlight(highlight().highlightQuery(randomBoolean() ? multiMatchQueryBuilder : null).highlighterType(type)
.field(new Field("field1").requireFieldMatch(true).preTags("<field1>").postTags("</field1>")));
logger.info("Running multi-match type: [" + matchQueryType + "] highlight with type: [" + type + "]");
SearchResponse searchResponse = client().search(searchRequest("test").source(source)).actionGet();
assertHitCount(searchResponse, 1l);
assertHighlight(searchResponse, 0, "field1", 0, anyOf(equalTo("<field1>The quick brown fox</field1> jumps over"),
equalTo("<field1>The</field1> <field1>quick</field1> <field1>brown</field1> <field1>fox</field1> jumps over")));
}
}
@Test
public void testPostingsHighlighterOrderByScore() throws Exception {
assertAcked(client().admin().indices().prepareCreate("test").addMapping("type1", type1PostingsffsetsMapping()));
@ -2680,7 +2714,7 @@ public class HighlighterSearchTests extends ElasticsearchIntegrationTest {
queryString("\"highlight words together\"").field("field1^100").autoGeneratePhraseQueries(true));
}
private <P extends QueryBuilder & BoostableQueryBuilder> void
private <P extends QueryBuilder & BoostableQueryBuilder<?>> void
phraseBoostTestCaseForClauses(String highlighterType, float boost, QueryBuilder terms, P phrase) {
Matcher<String> highlightedMatcher = Matchers.<String>either(containsString("<em>highlight words together</em>")).or(
containsString("<em>highlight</em> <em>words</em> <em>together</em>"));