Merge pull request #11364 from brwe/highlighter-wildcard

Wildcard field names in highlighting should only return fields that can be highlighted
This commit is contained in:
Britta Weber 2015-05-27 17:11:50 +02:00
commit ceb0782ebd
7 changed files with 168 additions and 21 deletions

View File

@ -63,7 +63,7 @@ public class FastVectorHighlighter implements Highlighter {
FetchSubPhase.HitContext hitContext = highlighterContext.hitContext;
FieldMapper mapper = highlighterContext.mapper;
if (!(mapper.fieldType().storeTermVectors() && mapper.fieldType().storeTermVectorOffsets() && mapper.fieldType().storeTermVectorPositions())) {
if (canHighlight(mapper) == false) {
throw new IllegalArgumentException("the field [" + highlighterContext.fieldName + "] should be indexed with term vector with position offsets to be used with fast vector highlighter");
}
@ -177,6 +177,11 @@ public class FastVectorHighlighter implements Highlighter {
}
}
@Override
public boolean canHighlight(FieldMapper fieldMapper) {
return fieldMapper.fieldType().storeTermVectors() && fieldMapper.fieldType().storeTermVectorOffsets() && fieldMapper.fieldType().storeTermVectorPositions();
}
private class MapperHighlightEntry {
public FragListBuilder fragListBuilder;
public FragmentsBuilder fragmentsBuilder;

View File

@ -21,7 +21,6 @@ package org.elasticsearch.search.highlight;
import com.google.common.collect.ImmutableList;
import com.google.common.collect.ImmutableMap;
import org.apache.lucene.index.IndexOptions;
import org.apache.lucene.search.Query;
import org.elasticsearch.common.component.AbstractComponent;
import org.elasticsearch.common.inject.Inject;
@ -45,6 +44,8 @@ import static com.google.common.collect.Maps.newHashMap;
*/
public class HighlightPhase extends AbstractComponent implements FetchSubPhase {
private static final ImmutableList<String> STANDARD_HIGHLIGHTERS_BY_PRECEDENCE = ImmutableList.of("fvh", "postings", "plain");
private final Highlighters highlighters;
@Inject
@ -91,6 +92,7 @@ public class HighlightPhase extends AbstractComponent implements FetchSubPhase {
}
}
boolean fieldNameContainsWildcards = field.field().contains("*");
for (String fieldName : fieldNamesToHighlight) {
FieldMapper fieldMapper = getMapperForField(fieldName, context, hitContext);
if (fieldMapper == null) {
@ -99,16 +101,14 @@ public class HighlightPhase extends AbstractComponent implements FetchSubPhase {
String highlighterType = field.fieldOptions().highlighterType();
if (highlighterType == null) {
boolean useFastVectorHighlighter = fieldMapper.fieldType().storeTermVectors() && fieldMapper.fieldType().storeTermVectorOffsets() && fieldMapper.fieldType().storeTermVectorPositions();
if (useFastVectorHighlighter) {
highlighterType = "fvh";
} else if (fieldMapper.fieldType().indexOptions() == IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS) {
highlighterType = "postings";
} else {
highlighterType = "plain";
for(String highlighterCandidate : STANDARD_HIGHLIGHTERS_BY_PRECEDENCE) {
if (highlighters.get(highlighterCandidate).canHighlight(fieldMapper)) {
highlighterType = highlighterCandidate;
break;
}
}
assert highlighterType != null;
}
Highlighter highlighter = highlighters.get(highlighterType);
if (highlighter == null) {
throw new IllegalArgumentException("unknown highlighter type [" + highlighterType + "] for the field [" + fieldName + "]");
@ -116,13 +116,17 @@ public class HighlightPhase extends AbstractComponent implements FetchSubPhase {
Query highlightQuery = field.fieldOptions().highlightQuery() == null ? context.parsedQuery().query() : field.fieldOptions().highlightQuery();
HighlighterContext highlighterContext = new HighlighterContext(fieldName, field, fieldMapper, context, hitContext, highlightQuery);
if ((highlighter.canHighlight(fieldMapper) == false) && fieldNameContainsWildcards) {
// if several fieldnames matched the wildcard then we want to skip those that we cannot highlight
continue;
}
HighlightField highlightField = highlighter.highlight(highlighterContext);
if (highlightField != null) {
highlightFields.put(highlightField.name(), highlightField);
}
}
}
hitContext.hit().highlightFields(highlightFields);
}

View File

@ -18,6 +18,8 @@
*/
package org.elasticsearch.search.highlight;
import org.elasticsearch.index.mapper.FieldMapper;
/**
*
*/
@ -26,4 +28,6 @@ public interface Highlighter {
String[] names();
HighlightField highlight(HighlighterContext highlighterContext);
boolean canHighlight(FieldMapper fieldMapper);
}

View File

@ -24,6 +24,7 @@ import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
import org.apache.lucene.analysis.tokenattributes.OffsetAttribute;
import org.apache.lucene.search.highlight.*;
import org.apache.lucene.util.BytesRefHash;
import org.apache.lucene.util.CollectionUtil;
import org.elasticsearch.common.text.StringText;
import org.elasticsearch.common.text.Text;
@ -117,7 +118,14 @@ public class PlainHighlighter implements Highlighter {
}
}
} catch (Exception e) {
throw new FetchPhaseExecutionException(context, "Failed to highlight field [" + highlighterContext.fieldName + "]", e);
if (e instanceof BytesRefHash.MaxBytesLengthExceededException) {
// this can happen if for example a field is not_analyzed and ignore_above option is set.
// the field will be ignored when indexing but the huge term is still in the source and
// the plain highlighter will parse the source and try to analyze it.
return null;
} else {
throw new FetchPhaseExecutionException(context, "Failed to highlight field [" + highlighterContext.fieldName + "]", e);
}
}
if (field.fieldOptions().scoreOrdered()) {
CollectionUtil.introSort(fragsList, new Comparator<TextFragment>() {
@ -164,6 +172,11 @@ public class PlainHighlighter implements Highlighter {
return null;
}
@Override
public boolean canHighlight(FieldMapper fieldMapper) {
return true;
}
private static int findGoodEndForNoHighlightExcerpt(int noMatchSize, TokenStream tokenStream) throws IOException {
try {
if (!tokenStream.hasAttribute(OffsetAttribute.class)) {

View File

@ -50,7 +50,7 @@ public class PostingsHighlighter implements Highlighter {
FieldMapper fieldMapper = highlighterContext.mapper;
SearchContextHighlight.Field field = highlighterContext.field;
if (fieldMapper.fieldType().indexOptions() != IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS) {
if (canHighlight(fieldMapper) == false) {
throw new IllegalArgumentException("the field [" + highlighterContext.fieldName + "] should be indexed with positions and offsets in the postings list to be used with postings highlighter");
}
@ -126,6 +126,11 @@ public class PostingsHighlighter implements Highlighter {
return null;
}
@Override
public boolean canHighlight(FieldMapper fieldMapper) {
return fieldMapper.fieldType().indexOptions() == IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS;
}
private static String mergeFieldValues(List<Object> fieldValues, char valuesSeparator) {
//postings highlighter accepts all values in a single string, as offsets etc. need to match with content
//loaded from stored fields, we merge all values using a proper separator

View File

@ -21,6 +21,7 @@ package org.elasticsearch.search.highlight;
import com.google.common.collect.Lists;
import org.elasticsearch.common.text.StringText;
import org.elasticsearch.common.text.Text;
import org.elasticsearch.index.mapper.FieldMapper;
import java.util.List;
import java.util.Locale;
@ -68,6 +69,11 @@ public class CustomHighlighter implements Highlighter {
return new HighlightField(highlighterContext.fieldName, responses.toArray(new Text[]{}));
}
@Override
public boolean canHighlight(FieldMapper fieldMapper) {
return true;
}
private static class CacheEntry {
private int position;
private int docId;

View File

@ -51,12 +51,124 @@ import static org.elasticsearch.index.query.QueryBuilders.*;
import static org.elasticsearch.search.builder.SearchSourceBuilder.highlight;
import static org.elasticsearch.search.builder.SearchSourceBuilder.searchSource;
import static org.elasticsearch.test.hamcrest.ElasticsearchAssertions.*;
import static org.elasticsearch.test.hamcrest.ElasticsearchAssertions.assertNoFailures;
import static org.elasticsearch.test.hamcrest.RegexMatcher.matches;
import static org.hamcrest.Matchers.*;
@Slow
public class HighlighterSearchTests extends ElasticsearchIntegrationTest {
@Test
public void testHighlightingWithWildcardName() throws IOException {
// test the kibana case with * as fieldname that will try highlight all fields including meta fields
XContentBuilder mappings = jsonBuilder();
mappings.startObject();
mappings.startObject("type")
.startObject("properties")
.startObject("text")
.field("type", "string")
.field("analyzer", "keyword")
.field("index_options", "offsets")
.field("term_vector", "with_positions_offsets")
.endObject()
.endObject()
.endObject();
mappings.endObject();
assertAcked(prepareCreate("test")
.addMapping("type", mappings));
ensureYellow();
client().prepareIndex("test", "type", "1")
.setSource(jsonBuilder().startObject().field("text", "text").endObject())
.get();
refresh();
String highlighter = randomFrom(new String[]{"plain", "postings", "fvh"});
SearchResponse search = client().prepareSearch().setQuery(constantScoreQuery(matchQuery("text", "text"))).addHighlightedField(new Field("*").highlighterType(highlighter)).get();
assertHighlight(search, 0, "text", 0, equalTo("<em>text</em>"));
}
@Test
public void testPlainHighlighterWithLongUnanalyzedStringTerm() throws IOException {
XContentBuilder mappings = jsonBuilder();
mappings.startObject();
mappings.startObject("type")
.startObject("properties")
.startObject("long_text")
.field("type", "string")
.field("analyzer", "keyword")
.field("index_options", "offsets")
.field("term_vector", "with_positions_offsets")
.field("ignore_above", 1)
.endObject()
.startObject("text")
.field("type", "string")
.field("analyzer", "keyword")
.field("index_options", "offsets")
.field("term_vector", "with_positions_offsets")
.endObject()
.endObject()
.endObject();
mappings.endObject();
assertAcked(prepareCreate("test")
.addMapping("type", mappings));
ensureYellow();
// crate a term that is larger than the allowed 32766, index it and then try highlight on it
// the search request should still succeed
StringBuilder builder = new StringBuilder();
for (int i = 0; i < 32767; i++) {
builder.append('a');
}
client().prepareIndex("test", "type", "1")
.setSource(jsonBuilder().startObject().field("long_text", builder.toString()).field("text", "text").endObject())
.get();
refresh();
String highlighter = randomFrom(new String[]{"plain", "postings", "fvh"});
SearchResponse search = client().prepareSearch().setQuery(constantScoreQuery(matchQuery("text", "text"))).addHighlightedField(new Field("*").highlighterType(highlighter)).get();
assertHighlight(search, 0, "text", 0, equalTo("<em>text</em>"));
search = client().prepareSearch().setQuery(constantScoreQuery(matchQuery("text", "text"))).addHighlightedField(new Field("long_text").highlighterType(highlighter)).get();
assertNoFailures(search);
assertThat(search.getHits().getAt(0).getHighlightFields().size(), equalTo(0));
}
@Test
public void testHighlightingWhenFieldsAreNotStoredThereIsNoSource() throws IOException {
XContentBuilder mappings = jsonBuilder();
mappings.startObject();
mappings.startObject("type")
.startObject("_source")
.field("enabled", false)
.endObject()
.startObject("properties")
.startObject("unstored_field")
.field("index_options", "offsets")
.field("term_vector", "with_positions_offsets")
.field("type", "string")
.field("store", "no")
.endObject()
.startObject("text")
.field("index_options", "offsets")
.field("term_vector", "with_positions_offsets")
.field("type", "string")
.field("store", "yes")
.endObject()
.endObject()
.endObject();
mappings.endObject();
assertAcked(prepareCreate("test")
.addMapping("type", mappings));
ensureYellow();
client().prepareIndex("test", "type", "1")
.setSource(jsonBuilder().startObject().field("unstored_text", "text").field("text", "text").endObject())
.get();
refresh();
String highlighter = randomFrom(new String[]{"plain", "postings", "fvh"});
SearchResponse search = client().prepareSearch().setQuery(constantScoreQuery(matchQuery("text", "text"))).addHighlightedField(new Field("*").highlighterType(highlighter)).get();
assertHighlight(search, 0, "text", 0, equalTo("<em>text</em>"));
search = client().prepareSearch().setQuery(constantScoreQuery(matchQuery("text", "text"))).addHighlightedField(new Field("unstored_text")).get();
assertNoFailures(search);
assertThat(search.getHits().getAt(0).getHighlightFields().size(), equalTo(0));
}
@Test
// see #3486
public void testHighTermFrequencyDoc() throws IOException {
@ -1171,12 +1283,11 @@ public class HighlighterSearchTests extends ElasticsearchIntegrationTest {
RestStatus.BAD_REQUEST,
containsString("the field [title] should be indexed with term vector with position offsets to be used with fast vector highlighter"));
assertFailures(client().prepareSearch()
//should not fail if there is a wildcard
assertNoFailures(client().prepareSearch()
.setQuery(matchPhraseQuery("title", "this is a test"))
.addHighlightedField("tit*", 50, 1, 10)
.setHighlighterType("fast-vector-highlighter"),
RestStatus.BAD_REQUEST,
containsString("the field [title] should be indexed with term vector with position offsets to be used with fast vector highlighter"));
.setHighlighterType("fast-vector-highlighter").get());
}
@Test
@ -2169,12 +2280,11 @@ public class HighlighterSearchTests extends ElasticsearchIntegrationTest {
RestStatus.BAD_REQUEST,
containsString("the field [title] should be indexed with positions and offsets in the postings list to be used with postings highlighter"));
assertFailures(client().prepareSearch()
//should not fail if there is a wildcard
assertNoFailures(client().prepareSearch()
.setQuery(matchQuery("title", "this is a test"))
.addHighlightedField("tit*")
.setHighlighterType("postings"),
RestStatus.BAD_REQUEST,
containsString("the field [title] should be indexed with positions and offsets in the postings list to be used with postings highlighter"));
.setHighlighterType("postings").get());
}
@Test