Merge pull request #15516 from jpountz/fix/highlighter_extract

Fix spans extraction to not also include individual terms.
This commit is contained in:
Adrien Grand 2015-12-17 17:12:18 +01:00
commit 1e0f776af3
3 changed files with 46 additions and 4 deletions

View File

@ -82,7 +82,7 @@ public final class CustomQueryScorer extends QueryScorer {
} else if (query instanceof FiltersFunctionScoreQuery) { } else if (query instanceof FiltersFunctionScoreQuery) {
query = ((FiltersFunctionScoreQuery) query).getSubQuery(); query = ((FiltersFunctionScoreQuery) query).getSubQuery();
extract(query, query.getBoost(), terms); extract(query, query.getBoost(), terms);
} else { } else if (terms.isEmpty()) {
extractWeightedTerms(terms, query, query.getBoost()); extractWeightedTerms(terms, query, query.getBoost());
} }
} }

View File

@ -1557,7 +1557,7 @@ public class HighlighterSearchIT extends ESIntegTestCase {
.fragmenter("simple"))).get(); .fragmenter("simple"))).get();
assertHighlight(response, 0, "tags", 0, equalTo("this is a really <em>long</em> <em>tag</em> i would like to highlight")); assertHighlight(response, 0, "tags", 0, equalTo("this is a really <em>long</em> <em>tag</em> i would like to highlight"));
assertHighlight(response, 0, "tags", 1, 2, equalTo("here is another one that is very <em>long</em> <em>tag</em> and has the <em>tag</em> token near the end")); assertHighlight(response, 0, "tags", 1, 2, equalTo("here is another one that is very <em>long</em> <em>tag</em> and has the tag token near the end"));
response = client().prepareSearch("test") response = client().prepareSearch("test")
.setQuery(QueryBuilders.matchQuery("tags", "long tag").type(MatchQuery.Type.PHRASE)) .setQuery(QueryBuilders.matchQuery("tags", "long tag").type(MatchQuery.Type.PHRASE))
@ -1566,7 +1566,7 @@ public class HighlighterSearchIT extends ESIntegTestCase {
.fragmenter("span"))).get(); .fragmenter("span"))).get();
assertHighlight(response, 0, "tags", 0, equalTo("this is a really <em>long</em> <em>tag</em> i would like to highlight")); assertHighlight(response, 0, "tags", 0, equalTo("this is a really <em>long</em> <em>tag</em> i would like to highlight"));
assertHighlight(response, 0, "tags", 1, 2, equalTo("here is another one that is very <em>long</em> <em>tag</em> and has the <em>tag</em> token near the end")); assertHighlight(response, 0, "tags", 1, 2, equalTo("here is another one that is very <em>long</em> <em>tag</em> and has the tag token near the end"));
assertFailures(client().prepareSearch("test") assertFailures(client().prepareSearch("test")
.setQuery(QueryBuilders.matchQuery("tags", "long tag").type(MatchQuery.Type.PHRASE)) .setQuery(QueryBuilders.matchQuery("tags", "long tag").type(MatchQuery.Type.PHRASE))
@ -2062,7 +2062,7 @@ public class HighlighterSearchIT extends ESIntegTestCase {
searchResponse = client().search(searchRequest("test").source(source)).actionGet(); searchResponse = client().search(searchRequest("test").source(source)).actionGet();
assertHighlight(searchResponse, 0, "field2", 0, 1, equalTo("The <xxx>quick</xxx> <xxx>brown</xxx> fox jumps over the lazy <xxx>quick</xxx> dog")); assertHighlight(searchResponse, 0, "field2", 0, 1, equalTo("The <xxx>quick</xxx> <xxx>brown</xxx> fox jumps over the lazy quick dog"));
} }
public void testPostingsHighlighterMultipleFields() throws Exception { public void testPostingsHighlighterMultipleFields() throws Exception {

View File

@ -0,0 +1,42 @@
/*
* Licensed to Elasticsearch under one or more contributor
* license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright
* ownership. Elasticsearch licenses this file to you under
* the Apache License, Version 2.0 (the "License"); you may
* not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
package org.elasticsearch.search.highlight;
import org.apache.lucene.analysis.MockAnalyzer;
import org.apache.lucene.index.Term;
import org.apache.lucene.search.PhraseQuery;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.highlight.QueryScorer;
import org.apache.lucene.util.LuceneTestCase;
public class PlainHighlighterTests extends LuceneTestCase {
public void testHighlightPhrase() throws Exception {
Query query = new PhraseQuery.Builder()
.add(new Term("field", "foo"))
.add(new Term("field", "bar"))
.build();
QueryScorer queryScorer = new CustomQueryScorer(query);
org.apache.lucene.search.highlight.Highlighter highlighter = new org.apache.lucene.search.highlight.Highlighter(queryScorer);
String[] frags = highlighter.getBestFragments(new MockAnalyzer(random()), "field", "bar foo bar foo", 10);
assertArrayEquals(new String[] {"bar <B>foo</B> <B>bar</B> foo"}, frags);
}
}