Highlighting: Plain highlighter to use analyzer defined on a document level when available.

At the moment plain highligher only uses an analyzer defined for on the type
level. However, during the indexing stage it is possible to define analyzer on
per document level, for example mapping '_analyzer' to another field, containing
required name. This commit attempts to make sure that highlighting works
correctly in this scenario.

Closes #5497
This commit is contained in:
mateusz_kaczynski 2014-05-13 13:58:52 +00:00 committed by Adrien Grand
parent 5fdb35fc54
commit e97a381db2
5 changed files with 85 additions and 14 deletions

View File

@ -386,6 +386,10 @@ public class DocumentMapper implements ToXContent {
return rootMapper(SourceFieldMapper.class);
}
public AnalyzerMapper analyzerMapper() {
return rootMapper(AnalyzerMapper.class);
}
public AllFieldMapper allFieldMapper() {
return rootMapper(AllFieldMapper.class);
}

View File

@ -24,6 +24,7 @@ import org.apache.lucene.index.IndexableField;
import org.elasticsearch.common.Strings;
import org.elasticsearch.common.xcontent.XContentBuilder;
import org.elasticsearch.index.mapper.*;
import org.elasticsearch.search.highlight.HighlighterContext;
import java.io.IOException;
import java.util.List;
@ -129,6 +130,26 @@ public class AnalyzerMapper implements Mapper, InternalMapper, RootMapper {
return false;
}
public Analyzer setAnalyzer(HighlighterContext context){
if (context.analyzer() != null){
return context.analyzer();
}
Analyzer analyzer = null;
if (path != null) {
String analyzerName = (String) context.context.lookup().source().extractValue(path);
analyzer = context.context.mapperService().analysisService().analyzer(analyzerName);
}
if (analyzer == null) {
analyzer = context.context.mapperService().documentMapper(context.hitContext.hit().type()).mappers().indexAnalyzer();
}
context.analyzer(analyzer);
return analyzer;
}
@Override
public void parse(ParseContext context) throws IOException {
}

View File

@ -18,6 +18,7 @@
*/
package org.elasticsearch.search.highlight;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.search.Query;
import org.elasticsearch.index.mapper.FieldMapper;
import org.elasticsearch.search.fetch.FetchSubPhase;
@ -34,6 +35,7 @@ public class HighlighterContext {
public final SearchContext context;
public final FetchSubPhase.HitContext hitContext;
public final HighlightQuery query;
private Analyzer analyzer;
public HighlighterContext(String fieldName, SearchContextHighlight.Field field, FieldMapper<?> mapper, SearchContext context,
FetchSubPhase.HitContext hitContext, HighlightQuery query) {
@ -68,4 +70,12 @@ public class HighlighterContext {
return query;
}
}
public Analyzer analyzer() {
return this.analyzer;
}
public void analyzer(Analyzer analyzer) {
this.analyzer = analyzer;
}
}

View File

@ -30,6 +30,7 @@ import org.elasticsearch.ElasticsearchIllegalArgumentException;
import org.elasticsearch.common.text.StringText;
import org.elasticsearch.common.text.Text;
import org.elasticsearch.index.mapper.FieldMapper;
import org.elasticsearch.index.mapper.internal.AnalyzerMapper;
import org.elasticsearch.search.fetch.FetchPhaseExecutionException;
import org.elasticsearch.search.fetch.FetchSubPhase;
import org.elasticsearch.search.internal.SearchContext;
@ -99,12 +100,16 @@ public class PlainHighlighter implements Highlighter {
ArrayList<TextFragment> fragsList = new ArrayList<>();
List<Object> textsToHighlight;
AnalyzerMapper analyzerMapper = context.mapperService().documentMapper(hitContext.hit().type()).analyzerMapper();
Analyzer analyzer = analyzerMapper.setAnalyzer(highlighterContext);
try {
textsToHighlight = HighlightUtils.loadFieldValues(field, mapper, context, hitContext);
for (Object textToHighlight : textsToHighlight) {
String text = textToHighlight.toString();
Analyzer analyzer = context.mapperService().documentMapper(hitContext.hit().type()).mappers().indexAnalyzer();
TokenStream tokenStream = analyzer.tokenStream(mapper.names().indexName(), text);
if (!tokenStream.hasAttribute(CharTermAttribute.class) || !tokenStream.hasAttribute(OffsetAttribute.class)) {
// can't perform highlighting if the stream has no terms (binary token stream) or no offsets
@ -151,7 +156,6 @@ public class PlainHighlighter implements Highlighter {
if (noMatchSize > 0 && textsToHighlight.size() > 0) {
// Pull an excerpt from the beginning of the string but make sure to split the string on a term boundary.
String fieldContents = textsToHighlight.get(0).toString();
Analyzer analyzer = context.mapperService().documentMapper(hitContext.hit().type()).mappers().indexAnalyzer();
int end;
try {
end = findGoodEndForNoHighlightExcerpt(noMatchSize, analyzer.tokenStream(mapper.names().indexName(), fieldContents));

View File

@ -63,7 +63,7 @@ import static org.hamcrest.Matchers.*;
*
*/
public class HighlighterSearchTests extends ElasticsearchIntegrationTest {
@Test
// see #3486
public void testHighTermFrequencyDoc() throws ElasticsearchException, IOException {
@ -137,8 +137,8 @@ public class HighlighterSearchTests extends ElasticsearchIntegrationTest {
SearchResponse search = client().prepareSearch("test").setTypes("test").setQuery(matchQuery("name.autocomplete", "deut tel").operator(Operator.OR)).addHighlightedField("name.autocomplete").execute().actionGet();
assertHighlight(search, 0, "name.autocomplete", 0, equalTo("ARCO<em>TEL</em> Ho<em>tel</em>s <em>Deut</em>schland"));
}
@Test
@Test
public void testMultiPhraseCutoff() throws ElasticsearchException, IOException {
/*
* MultiPhraseQuery can literally kill an entire node if there are too many terms in the
@ -169,7 +169,7 @@ public class HighlighterSearchTests extends ElasticsearchIntegrationTest {
search = client().prepareSearch().setQuery(matchQuery("body", "Test: http://www.facebook.com http://elasticsearch.org http://xing.com http://cnn.com http://quora.com http://twitter.com this is a test for highlighting feature Test: http://www.facebook.com http://elasticsearch.org http://xing.com http://cnn.com http://quora.com http://twitter.com this is a test for highlighting feature").type(Type.PHRASE)).addHighlightedField("body").execute().actionGet();
assertHighlight(search, 0, "body", 0, equalTo("<em>Test</em>: <em>http://www.facebook.com</em> <em>http://elasticsearch.org</em> <em>http://xing.com</em> <em>http://cnn.com</em> http://quora.com"));
}
@Test
public void testNgramHighlightingPreLucene42() throws ElasticsearchException, IOException {
@ -237,7 +237,7 @@ public class HighlighterSearchTests extends ElasticsearchIntegrationTest {
assertHighlight(search, 1, "name2", 0, anyOf(equalTo("<em>logica</em>cmg ehemals avinci - the know how company"),
equalTo("avinci, unilog avinci, <em>logica</em>cmg, <em>logica</em>")));
}
@Test
public void testNgramHighlighting() throws ElasticsearchException, IOException {
assertAcked(prepareCreate("test")
@ -264,23 +264,23 @@ public class HighlighterSearchTests extends ElasticsearchIntegrationTest {
ensureGreen();
SearchResponse search = client().prepareSearch().setQuery(matchQuery("name", "logica m")).addHighlightedField("name").get();
assertHighlight(search, 0, "name", 0, equalTo("<em>logica</em>c<em>m</em>g ehe<em>m</em>als avinci - the know how co<em>m</em>pany"));
search = client().prepareSearch().setQuery(matchQuery("name", "logica ma")).addHighlightedField("name").get();
assertHighlight(search, 0, "name", 0, equalTo("<em>logica</em>cmg ehe<em>ma</em>ls avinci - the know how company"));
search = client().prepareSearch().setQuery(matchQuery("name", "logica")).addHighlightedField("name").get();
assertHighlight(search, 0, "name", 0, equalTo("<em>logica</em>cmg ehemals avinci - the know how company"));
search = client().prepareSearch().setQuery(matchQuery("name2", "logica m")).addHighlightedField("name2").get();
assertHighlight(search, 0, "name2", 0, equalTo("<em>logicacmg</em> <em>ehemals</em> avinci - the know how <em>company</em>"));
search = client().prepareSearch().setQuery(matchQuery("name2", "logica ma")).addHighlightedField("name2").get();
assertHighlight(search, 0, "name2", 0, equalTo("<em>logicacmg</em> <em>ehemals</em> avinci - the know how company"));
search = client().prepareSearch().setQuery(matchQuery("name2", "logica")).addHighlightedField("name2").get();
assertHighlight(search, 0, "name2", 0, equalTo("<em>logicacmg</em> ehemals avinci - the know how company"));
}
@Test
public void testEnsureNoNegativeOffsets() throws Exception {
assertAcked(prepareCreate("test")
@ -314,7 +314,7 @@ public class HighlighterSearchTests extends ElasticsearchIntegrationTest {
assertHighlight(search, 0, "no_long_term", 0, 1, equalTo("a <b>test</b> where <b>foo</b> is <b>highlighed</b> and"));
}
@Test
public void testSourceLookupHighlightingUsingPlainHighlighter() throws Exception {
assertAcked(prepareCreate("test")
@ -656,6 +656,38 @@ public class HighlighterSearchTests extends ElasticsearchIntegrationTest {
assertHighlight(searchResponse, 0, "field2", 0, 1, equalTo("The <xxx>quick</xxx> brown fox jumps over the lazy dog"));
}
@Test
public void testPlainHighlighterDocumentAnalyzer() throws Exception {
client().admin().indices().prepareCreate("test")
.addMapping("type1", XContentFactory.jsonBuilder().startObject().startObject("type1")
.startObject("_analyzer")
.field("path", "language_analyzer")
.endObject()
.startObject("properties")
.startObject("language_analyzer")
.field("type", "string")
.field("index", "not_analyzed")
.endObject()
.startObject("text")
.field("type", "string")
.endObject()
.endObject()
.endObject().endObject()).execute().actionGet();
ensureYellow();
index("test", "type1", "1",
"language_analyzer", "english",
"text", "Look at me, I'm eating cars.");
refresh();
SearchResponse response = client().prepareSearch("test")
.setQuery(QueryBuilders.matchQuery("text", "car"))
.addHighlightedField(
new HighlightBuilder.Field("text").preTags("<1>").postTags("</1>").requireFieldMatch(true))
.get();
assertHighlight(response, 0, "text", 0, 1, equalTo("Look at me, I'm eating <1>cars</1>."));
}
@Test
public void testFastVectorHighlighter() throws Exception {
assertAcked(prepareCreate("test").addMapping("type1", type1TermVectorMapping()));
@ -1272,7 +1304,7 @@ public class HighlighterSearchTests extends ElasticsearchIntegrationTest {
assertHighlight(response, 0, "tags", 0, equalTo("this is a really long <em>tag</em> i would like to highlight"));
assertHighlight(response, 0, "tags", 1, 2, equalTo("here is another one that is very long and has the <em>tag</em> token near the end"));
}
@Test
public void testBoostingQuery() {
createIndex("test");
@ -1291,7 +1323,7 @@ public class HighlighterSearchTests extends ElasticsearchIntegrationTest {
assertHighlight(searchResponse, 0, "field2", 0, 1, equalTo("The quick <x>brown</x> fox jumps over the lazy dog"));
}
@Test
public void testBoostingQueryTermVector() throws ElasticsearchException, IOException {
assertAcked(prepareCreate("test").addMapping("type1", type1TermVectorMapping()));