Merge pull request #11364 from brwe/highlighter-wildcard
Wildcard field names in highlighting should only return fields that can be highlighted
This commit is contained in:
commit
ceb0782ebd
|
@ -63,7 +63,7 @@ public class FastVectorHighlighter implements Highlighter {
|
|||
FetchSubPhase.HitContext hitContext = highlighterContext.hitContext;
|
||||
FieldMapper mapper = highlighterContext.mapper;
|
||||
|
||||
if (!(mapper.fieldType().storeTermVectors() && mapper.fieldType().storeTermVectorOffsets() && mapper.fieldType().storeTermVectorPositions())) {
|
||||
if (canHighlight(mapper) == false) {
|
||||
throw new IllegalArgumentException("the field [" + highlighterContext.fieldName + "] should be indexed with term vector with position offsets to be used with fast vector highlighter");
|
||||
}
|
||||
|
||||
|
@ -177,6 +177,11 @@ public class FastVectorHighlighter implements Highlighter {
|
|||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean canHighlight(FieldMapper fieldMapper) {
|
||||
return fieldMapper.fieldType().storeTermVectors() && fieldMapper.fieldType().storeTermVectorOffsets() && fieldMapper.fieldType().storeTermVectorPositions();
|
||||
}
|
||||
|
||||
private class MapperHighlightEntry {
|
||||
public FragListBuilder fragListBuilder;
|
||||
public FragmentsBuilder fragmentsBuilder;
|
||||
|
|
|
@ -21,7 +21,6 @@ package org.elasticsearch.search.highlight;
|
|||
|
||||
import com.google.common.collect.ImmutableList;
|
||||
import com.google.common.collect.ImmutableMap;
|
||||
import org.apache.lucene.index.IndexOptions;
|
||||
import org.apache.lucene.search.Query;
|
||||
import org.elasticsearch.common.component.AbstractComponent;
|
||||
import org.elasticsearch.common.inject.Inject;
|
||||
|
@ -45,6 +44,8 @@ import static com.google.common.collect.Maps.newHashMap;
|
|||
*/
|
||||
public class HighlightPhase extends AbstractComponent implements FetchSubPhase {
|
||||
|
||||
private static final ImmutableList<String> STANDARD_HIGHLIGHTERS_BY_PRECEDENCE = ImmutableList.of("fvh", "postings", "plain");
|
||||
|
||||
private final Highlighters highlighters;
|
||||
|
||||
@Inject
|
||||
|
@ -91,6 +92,7 @@ public class HighlightPhase extends AbstractComponent implements FetchSubPhase {
|
|||
}
|
||||
}
|
||||
|
||||
boolean fieldNameContainsWildcards = field.field().contains("*");
|
||||
for (String fieldName : fieldNamesToHighlight) {
|
||||
FieldMapper fieldMapper = getMapperForField(fieldName, context, hitContext);
|
||||
if (fieldMapper == null) {
|
||||
|
@ -99,16 +101,14 @@ public class HighlightPhase extends AbstractComponent implements FetchSubPhase {
|
|||
|
||||
String highlighterType = field.fieldOptions().highlighterType();
|
||||
if (highlighterType == null) {
|
||||
boolean useFastVectorHighlighter = fieldMapper.fieldType().storeTermVectors() && fieldMapper.fieldType().storeTermVectorOffsets() && fieldMapper.fieldType().storeTermVectorPositions();
|
||||
if (useFastVectorHighlighter) {
|
||||
highlighterType = "fvh";
|
||||
} else if (fieldMapper.fieldType().indexOptions() == IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS) {
|
||||
highlighterType = "postings";
|
||||
} else {
|
||||
highlighterType = "plain";
|
||||
for(String highlighterCandidate : STANDARD_HIGHLIGHTERS_BY_PRECEDENCE) {
|
||||
if (highlighters.get(highlighterCandidate).canHighlight(fieldMapper)) {
|
||||
highlighterType = highlighterCandidate;
|
||||
break;
|
||||
}
|
||||
}
|
||||
assert highlighterType != null;
|
||||
}
|
||||
|
||||
Highlighter highlighter = highlighters.get(highlighterType);
|
||||
if (highlighter == null) {
|
||||
throw new IllegalArgumentException("unknown highlighter type [" + highlighterType + "] for the field [" + fieldName + "]");
|
||||
|
@ -116,13 +116,17 @@ public class HighlightPhase extends AbstractComponent implements FetchSubPhase {
|
|||
|
||||
Query highlightQuery = field.fieldOptions().highlightQuery() == null ? context.parsedQuery().query() : field.fieldOptions().highlightQuery();
|
||||
HighlighterContext highlighterContext = new HighlighterContext(fieldName, field, fieldMapper, context, hitContext, highlightQuery);
|
||||
|
||||
if ((highlighter.canHighlight(fieldMapper) == false) && fieldNameContainsWildcards) {
|
||||
// if several fieldnames matched the wildcard then we want to skip those that we cannot highlight
|
||||
continue;
|
||||
}
|
||||
HighlightField highlightField = highlighter.highlight(highlighterContext);
|
||||
if (highlightField != null) {
|
||||
highlightFields.put(highlightField.name(), highlightField);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
hitContext.hit().highlightFields(highlightFields);
|
||||
}
|
||||
|
||||
|
|
|
@ -18,6 +18,8 @@
|
|||
*/
|
||||
package org.elasticsearch.search.highlight;
|
||||
|
||||
import org.elasticsearch.index.mapper.FieldMapper;
|
||||
|
||||
/**
|
||||
*
|
||||
*/
|
||||
|
@ -26,4 +28,6 @@ public interface Highlighter {
|
|||
String[] names();
|
||||
|
||||
HighlightField highlight(HighlighterContext highlighterContext);
|
||||
|
||||
boolean canHighlight(FieldMapper fieldMapper);
|
||||
}
|
||||
|
|
|
@ -24,6 +24,7 @@ import org.apache.lucene.analysis.TokenStream;
|
|||
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
|
||||
import org.apache.lucene.analysis.tokenattributes.OffsetAttribute;
|
||||
import org.apache.lucene.search.highlight.*;
|
||||
import org.apache.lucene.util.BytesRefHash;
|
||||
import org.apache.lucene.util.CollectionUtil;
|
||||
import org.elasticsearch.common.text.StringText;
|
||||
import org.elasticsearch.common.text.Text;
|
||||
|
@ -117,7 +118,14 @@ public class PlainHighlighter implements Highlighter {
|
|||
}
|
||||
}
|
||||
} catch (Exception e) {
|
||||
throw new FetchPhaseExecutionException(context, "Failed to highlight field [" + highlighterContext.fieldName + "]", e);
|
||||
if (e instanceof BytesRefHash.MaxBytesLengthExceededException) {
|
||||
// this can happen if for example a field is not_analyzed and ignore_above option is set.
|
||||
// the field will be ignored when indexing but the huge term is still in the source and
|
||||
// the plain highlighter will parse the source and try to analyze it.
|
||||
return null;
|
||||
} else {
|
||||
throw new FetchPhaseExecutionException(context, "Failed to highlight field [" + highlighterContext.fieldName + "]", e);
|
||||
}
|
||||
}
|
||||
if (field.fieldOptions().scoreOrdered()) {
|
||||
CollectionUtil.introSort(fragsList, new Comparator<TextFragment>() {
|
||||
|
@ -164,6 +172,11 @@ public class PlainHighlighter implements Highlighter {
|
|||
return null;
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean canHighlight(FieldMapper fieldMapper) {
|
||||
return true;
|
||||
}
|
||||
|
||||
private static int findGoodEndForNoHighlightExcerpt(int noMatchSize, TokenStream tokenStream) throws IOException {
|
||||
try {
|
||||
if (!tokenStream.hasAttribute(OffsetAttribute.class)) {
|
||||
|
|
|
@ -50,7 +50,7 @@ public class PostingsHighlighter implements Highlighter {
|
|||
|
||||
FieldMapper fieldMapper = highlighterContext.mapper;
|
||||
SearchContextHighlight.Field field = highlighterContext.field;
|
||||
if (fieldMapper.fieldType().indexOptions() != IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS) {
|
||||
if (canHighlight(fieldMapper) == false) {
|
||||
throw new IllegalArgumentException("the field [" + highlighterContext.fieldName + "] should be indexed with positions and offsets in the postings list to be used with postings highlighter");
|
||||
}
|
||||
|
||||
|
@ -126,6 +126,11 @@ public class PostingsHighlighter implements Highlighter {
|
|||
return null;
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean canHighlight(FieldMapper fieldMapper) {
|
||||
return fieldMapper.fieldType().indexOptions() == IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS;
|
||||
}
|
||||
|
||||
private static String mergeFieldValues(List<Object> fieldValues, char valuesSeparator) {
|
||||
//postings highlighter accepts all values in a single string, as offsets etc. need to match with content
|
||||
//loaded from stored fields, we merge all values using a proper separator
|
||||
|
|
|
@ -21,6 +21,7 @@ package org.elasticsearch.search.highlight;
|
|||
import com.google.common.collect.Lists;
|
||||
import org.elasticsearch.common.text.StringText;
|
||||
import org.elasticsearch.common.text.Text;
|
||||
import org.elasticsearch.index.mapper.FieldMapper;
|
||||
|
||||
import java.util.List;
|
||||
import java.util.Locale;
|
||||
|
@ -68,6 +69,11 @@ public class CustomHighlighter implements Highlighter {
|
|||
return new HighlightField(highlighterContext.fieldName, responses.toArray(new Text[]{}));
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean canHighlight(FieldMapper fieldMapper) {
|
||||
return true;
|
||||
}
|
||||
|
||||
private static class CacheEntry {
|
||||
private int position;
|
||||
private int docId;
|
||||
|
|
|
@ -51,12 +51,124 @@ import static org.elasticsearch.index.query.QueryBuilders.*;
|
|||
import static org.elasticsearch.search.builder.SearchSourceBuilder.highlight;
|
||||
import static org.elasticsearch.search.builder.SearchSourceBuilder.searchSource;
|
||||
import static org.elasticsearch.test.hamcrest.ElasticsearchAssertions.*;
|
||||
import static org.elasticsearch.test.hamcrest.ElasticsearchAssertions.assertNoFailures;
|
||||
import static org.elasticsearch.test.hamcrest.RegexMatcher.matches;
|
||||
import static org.hamcrest.Matchers.*;
|
||||
|
||||
@Slow
|
||||
public class HighlighterSearchTests extends ElasticsearchIntegrationTest {
|
||||
|
||||
@Test
|
||||
public void testHighlightingWithWildcardName() throws IOException {
|
||||
// test the kibana case with * as fieldname that will try highlight all fields including meta fields
|
||||
XContentBuilder mappings = jsonBuilder();
|
||||
mappings.startObject();
|
||||
mappings.startObject("type")
|
||||
.startObject("properties")
|
||||
.startObject("text")
|
||||
.field("type", "string")
|
||||
.field("analyzer", "keyword")
|
||||
.field("index_options", "offsets")
|
||||
.field("term_vector", "with_positions_offsets")
|
||||
.endObject()
|
||||
.endObject()
|
||||
.endObject();
|
||||
mappings.endObject();
|
||||
assertAcked(prepareCreate("test")
|
||||
.addMapping("type", mappings));
|
||||
ensureYellow();
|
||||
client().prepareIndex("test", "type", "1")
|
||||
.setSource(jsonBuilder().startObject().field("text", "text").endObject())
|
||||
.get();
|
||||
refresh();
|
||||
String highlighter = randomFrom(new String[]{"plain", "postings", "fvh"});
|
||||
SearchResponse search = client().prepareSearch().setQuery(constantScoreQuery(matchQuery("text", "text"))).addHighlightedField(new Field("*").highlighterType(highlighter)).get();
|
||||
assertHighlight(search, 0, "text", 0, equalTo("<em>text</em>"));
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testPlainHighlighterWithLongUnanalyzedStringTerm() throws IOException {
|
||||
XContentBuilder mappings = jsonBuilder();
|
||||
mappings.startObject();
|
||||
mappings.startObject("type")
|
||||
.startObject("properties")
|
||||
.startObject("long_text")
|
||||
.field("type", "string")
|
||||
.field("analyzer", "keyword")
|
||||
.field("index_options", "offsets")
|
||||
.field("term_vector", "with_positions_offsets")
|
||||
.field("ignore_above", 1)
|
||||
.endObject()
|
||||
.startObject("text")
|
||||
.field("type", "string")
|
||||
.field("analyzer", "keyword")
|
||||
.field("index_options", "offsets")
|
||||
.field("term_vector", "with_positions_offsets")
|
||||
.endObject()
|
||||
.endObject()
|
||||
.endObject();
|
||||
mappings.endObject();
|
||||
assertAcked(prepareCreate("test")
|
||||
.addMapping("type", mappings));
|
||||
ensureYellow();
|
||||
// crate a term that is larger than the allowed 32766, index it and then try highlight on it
|
||||
// the search request should still succeed
|
||||
StringBuilder builder = new StringBuilder();
|
||||
for (int i = 0; i < 32767; i++) {
|
||||
builder.append('a');
|
||||
}
|
||||
client().prepareIndex("test", "type", "1")
|
||||
.setSource(jsonBuilder().startObject().field("long_text", builder.toString()).field("text", "text").endObject())
|
||||
.get();
|
||||
refresh();
|
||||
String highlighter = randomFrom(new String[]{"plain", "postings", "fvh"});
|
||||
SearchResponse search = client().prepareSearch().setQuery(constantScoreQuery(matchQuery("text", "text"))).addHighlightedField(new Field("*").highlighterType(highlighter)).get();
|
||||
assertHighlight(search, 0, "text", 0, equalTo("<em>text</em>"));
|
||||
search = client().prepareSearch().setQuery(constantScoreQuery(matchQuery("text", "text"))).addHighlightedField(new Field("long_text").highlighterType(highlighter)).get();
|
||||
assertNoFailures(search);
|
||||
assertThat(search.getHits().getAt(0).getHighlightFields().size(), equalTo(0));
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testHighlightingWhenFieldsAreNotStoredThereIsNoSource() throws IOException {
|
||||
XContentBuilder mappings = jsonBuilder();
|
||||
mappings.startObject();
|
||||
mappings.startObject("type")
|
||||
.startObject("_source")
|
||||
.field("enabled", false)
|
||||
.endObject()
|
||||
.startObject("properties")
|
||||
.startObject("unstored_field")
|
||||
.field("index_options", "offsets")
|
||||
.field("term_vector", "with_positions_offsets")
|
||||
.field("type", "string")
|
||||
.field("store", "no")
|
||||
.endObject()
|
||||
.startObject("text")
|
||||
.field("index_options", "offsets")
|
||||
.field("term_vector", "with_positions_offsets")
|
||||
.field("type", "string")
|
||||
.field("store", "yes")
|
||||
.endObject()
|
||||
.endObject()
|
||||
.endObject();
|
||||
mappings.endObject();
|
||||
assertAcked(prepareCreate("test")
|
||||
.addMapping("type", mappings));
|
||||
ensureYellow();
|
||||
client().prepareIndex("test", "type", "1")
|
||||
.setSource(jsonBuilder().startObject().field("unstored_text", "text").field("text", "text").endObject())
|
||||
.get();
|
||||
refresh();
|
||||
String highlighter = randomFrom(new String[]{"plain", "postings", "fvh"});
|
||||
SearchResponse search = client().prepareSearch().setQuery(constantScoreQuery(matchQuery("text", "text"))).addHighlightedField(new Field("*").highlighterType(highlighter)).get();
|
||||
assertHighlight(search, 0, "text", 0, equalTo("<em>text</em>"));
|
||||
search = client().prepareSearch().setQuery(constantScoreQuery(matchQuery("text", "text"))).addHighlightedField(new Field("unstored_text")).get();
|
||||
assertNoFailures(search);
|
||||
assertThat(search.getHits().getAt(0).getHighlightFields().size(), equalTo(0));
|
||||
}
|
||||
|
||||
|
||||
@Test
|
||||
// see #3486
|
||||
public void testHighTermFrequencyDoc() throws IOException {
|
||||
|
@ -1171,12 +1283,11 @@ public class HighlighterSearchTests extends ElasticsearchIntegrationTest {
|
|||
RestStatus.BAD_REQUEST,
|
||||
containsString("the field [title] should be indexed with term vector with position offsets to be used with fast vector highlighter"));
|
||||
|
||||
assertFailures(client().prepareSearch()
|
||||
//should not fail if there is a wildcard
|
||||
assertNoFailures(client().prepareSearch()
|
||||
.setQuery(matchPhraseQuery("title", "this is a test"))
|
||||
.addHighlightedField("tit*", 50, 1, 10)
|
||||
.setHighlighterType("fast-vector-highlighter"),
|
||||
RestStatus.BAD_REQUEST,
|
||||
containsString("the field [title] should be indexed with term vector with position offsets to be used with fast vector highlighter"));
|
||||
.setHighlighterType("fast-vector-highlighter").get());
|
||||
}
|
||||
|
||||
@Test
|
||||
|
@ -2169,12 +2280,11 @@ public class HighlighterSearchTests extends ElasticsearchIntegrationTest {
|
|||
RestStatus.BAD_REQUEST,
|
||||
containsString("the field [title] should be indexed with positions and offsets in the postings list to be used with postings highlighter"));
|
||||
|
||||
assertFailures(client().prepareSearch()
|
||||
//should not fail if there is a wildcard
|
||||
assertNoFailures(client().prepareSearch()
|
||||
.setQuery(matchQuery("title", "this is a test"))
|
||||
.addHighlightedField("tit*")
|
||||
.setHighlighterType("postings"),
|
||||
RestStatus.BAD_REQUEST,
|
||||
containsString("the field [title] should be indexed with positions and offsets in the postings list to be used with postings highlighter"));
|
||||
.setHighlighterType("postings").get());
|
||||
}
|
||||
|
||||
@Test
|
||||
|
|
Loading…
Reference in New Issue