highlighting: don't fail search request when name of highlighted field contains wildcards

When we highlight on fields using wildcards then fields might match that cannot
be highlighted by the specified highlighter. The whole search request then
failed. Instead, check that the field can be highlighted and ignore the field
if it can't.
In addition ignore the exception thrown by plain highlighter if a field conatins
terms larger than 32766.

closes #9881
This commit is contained in:
Britta Weber 2015-05-26 18:31:18 +02:00
parent 7451b4708e
commit 37610548f8
7 changed files with 168 additions and 21 deletions

View File

@ -63,7 +63,7 @@ public class FastVectorHighlighter implements Highlighter {
FetchSubPhase.HitContext hitContext = highlighterContext.hitContext; FetchSubPhase.HitContext hitContext = highlighterContext.hitContext;
FieldMapper mapper = highlighterContext.mapper; FieldMapper mapper = highlighterContext.mapper;
if (!(mapper.fieldType().storeTermVectors() && mapper.fieldType().storeTermVectorOffsets() && mapper.fieldType().storeTermVectorPositions())) { if (canHighlight(mapper) == false) {
throw new IllegalArgumentException("the field [" + highlighterContext.fieldName + "] should be indexed with term vector with position offsets to be used with fast vector highlighter"); throw new IllegalArgumentException("the field [" + highlighterContext.fieldName + "] should be indexed with term vector with position offsets to be used with fast vector highlighter");
} }
@ -177,6 +177,11 @@ public class FastVectorHighlighter implements Highlighter {
} }
} }
@Override
public boolean canHighlight(FieldMapper fieldMapper) {
return fieldMapper.fieldType().storeTermVectors() && fieldMapper.fieldType().storeTermVectorOffsets() && fieldMapper.fieldType().storeTermVectorPositions();
}
private class MapperHighlightEntry { private class MapperHighlightEntry {
public FragListBuilder fragListBuilder; public FragListBuilder fragListBuilder;
public FragmentsBuilder fragmentsBuilder; public FragmentsBuilder fragmentsBuilder;

View File

@ -21,7 +21,6 @@ package org.elasticsearch.search.highlight;
import com.google.common.collect.ImmutableList; import com.google.common.collect.ImmutableList;
import com.google.common.collect.ImmutableMap; import com.google.common.collect.ImmutableMap;
import org.apache.lucene.index.IndexOptions;
import org.apache.lucene.search.Query; import org.apache.lucene.search.Query;
import org.elasticsearch.common.component.AbstractComponent; import org.elasticsearch.common.component.AbstractComponent;
import org.elasticsearch.common.inject.Inject; import org.elasticsearch.common.inject.Inject;
@ -45,6 +44,8 @@ import static com.google.common.collect.Maps.newHashMap;
*/ */
public class HighlightPhase extends AbstractComponent implements FetchSubPhase { public class HighlightPhase extends AbstractComponent implements FetchSubPhase {
private static final ImmutableList<String> STANDARD_HIGHLIGHTERS_BY_PRECEDENCE = ImmutableList.of("fvh", "postings", "plain");
private final Highlighters highlighters; private final Highlighters highlighters;
@Inject @Inject
@ -91,6 +92,7 @@ public class HighlightPhase extends AbstractComponent implements FetchSubPhase {
} }
} }
boolean fieldNameContainsWildcards = field.field().contains("*");
for (String fieldName : fieldNamesToHighlight) { for (String fieldName : fieldNamesToHighlight) {
FieldMapper fieldMapper = getMapperForField(fieldName, context, hitContext); FieldMapper fieldMapper = getMapperForField(fieldName, context, hitContext);
if (fieldMapper == null) { if (fieldMapper == null) {
@ -99,16 +101,14 @@ public class HighlightPhase extends AbstractComponent implements FetchSubPhase {
String highlighterType = field.fieldOptions().highlighterType(); String highlighterType = field.fieldOptions().highlighterType();
if (highlighterType == null) { if (highlighterType == null) {
boolean useFastVectorHighlighter = fieldMapper.fieldType().storeTermVectors() && fieldMapper.fieldType().storeTermVectorOffsets() && fieldMapper.fieldType().storeTermVectorPositions(); for(String highlighterCandidate : STANDARD_HIGHLIGHTERS_BY_PRECEDENCE) {
if (useFastVectorHighlighter) { if (highlighters.get(highlighterCandidate).canHighlight(fieldMapper)) {
highlighterType = "fvh"; highlighterType = highlighterCandidate;
} else if (fieldMapper.fieldType().indexOptions() == IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS) { break;
highlighterType = "postings";
} else {
highlighterType = "plain";
} }
} }
assert highlighterType != null;
}
Highlighter highlighter = highlighters.get(highlighterType); Highlighter highlighter = highlighters.get(highlighterType);
if (highlighter == null) { if (highlighter == null) {
throw new IllegalArgumentException("unknown highlighter type [" + highlighterType + "] for the field [" + fieldName + "]"); throw new IllegalArgumentException("unknown highlighter type [" + highlighterType + "] for the field [" + fieldName + "]");
@ -116,13 +116,17 @@ public class HighlightPhase extends AbstractComponent implements FetchSubPhase {
Query highlightQuery = field.fieldOptions().highlightQuery() == null ? context.parsedQuery().query() : field.fieldOptions().highlightQuery(); Query highlightQuery = field.fieldOptions().highlightQuery() == null ? context.parsedQuery().query() : field.fieldOptions().highlightQuery();
HighlighterContext highlighterContext = new HighlighterContext(fieldName, field, fieldMapper, context, hitContext, highlightQuery); HighlighterContext highlighterContext = new HighlighterContext(fieldName, field, fieldMapper, context, hitContext, highlightQuery);
if ((highlighter.canHighlight(fieldMapper) == false) && fieldNameContainsWildcards) {
// if several fieldnames matched the wildcard then we want to skip those that we cannot highlight
continue;
}
HighlightField highlightField = highlighter.highlight(highlighterContext); HighlightField highlightField = highlighter.highlight(highlighterContext);
if (highlightField != null) { if (highlightField != null) {
highlightFields.put(highlightField.name(), highlightField); highlightFields.put(highlightField.name(), highlightField);
} }
} }
} }
hitContext.hit().highlightFields(highlightFields); hitContext.hit().highlightFields(highlightFields);
} }

View File

@ -18,6 +18,8 @@
*/ */
package org.elasticsearch.search.highlight; package org.elasticsearch.search.highlight;
import org.elasticsearch.index.mapper.FieldMapper;
/** /**
* *
*/ */
@ -26,4 +28,6 @@ public interface Highlighter {
String[] names(); String[] names();
HighlightField highlight(HighlighterContext highlighterContext); HighlightField highlight(HighlighterContext highlighterContext);
boolean canHighlight(FieldMapper fieldMapper);
} }

View File

@ -24,6 +24,7 @@ import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute; import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
import org.apache.lucene.analysis.tokenattributes.OffsetAttribute; import org.apache.lucene.analysis.tokenattributes.OffsetAttribute;
import org.apache.lucene.search.highlight.*; import org.apache.lucene.search.highlight.*;
import org.apache.lucene.util.BytesRefHash;
import org.apache.lucene.util.CollectionUtil; import org.apache.lucene.util.CollectionUtil;
import org.elasticsearch.common.text.StringText; import org.elasticsearch.common.text.StringText;
import org.elasticsearch.common.text.Text; import org.elasticsearch.common.text.Text;
@ -117,8 +118,15 @@ public class PlainHighlighter implements Highlighter {
} }
} }
} catch (Exception e) { } catch (Exception e) {
if (e instanceof BytesRefHash.MaxBytesLengthExceededException) {
// this can happen if for example a field is not_analyzed and ignore_above option is set.
// the field will be ignored when indexing but the huge term is still in the source and
// the plain highlighter will parse the source and try to analyze it.
return null;
} else {
throw new FetchPhaseExecutionException(context, "Failed to highlight field [" + highlighterContext.fieldName + "]", e); throw new FetchPhaseExecutionException(context, "Failed to highlight field [" + highlighterContext.fieldName + "]", e);
} }
}
if (field.fieldOptions().scoreOrdered()) { if (field.fieldOptions().scoreOrdered()) {
CollectionUtil.introSort(fragsList, new Comparator<TextFragment>() { CollectionUtil.introSort(fragsList, new Comparator<TextFragment>() {
@Override @Override
@ -164,6 +172,11 @@ public class PlainHighlighter implements Highlighter {
return null; return null;
} }
@Override
public boolean canHighlight(FieldMapper fieldMapper) {
return true;
}
private static int findGoodEndForNoHighlightExcerpt(int noMatchSize, TokenStream tokenStream) throws IOException { private static int findGoodEndForNoHighlightExcerpt(int noMatchSize, TokenStream tokenStream) throws IOException {
try { try {
if (!tokenStream.hasAttribute(OffsetAttribute.class)) { if (!tokenStream.hasAttribute(OffsetAttribute.class)) {

View File

@ -50,7 +50,7 @@ public class PostingsHighlighter implements Highlighter {
FieldMapper fieldMapper = highlighterContext.mapper; FieldMapper fieldMapper = highlighterContext.mapper;
SearchContextHighlight.Field field = highlighterContext.field; SearchContextHighlight.Field field = highlighterContext.field;
if (fieldMapper.fieldType().indexOptions() != IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS) { if (canHighlight(fieldMapper) == false) {
throw new IllegalArgumentException("the field [" + highlighterContext.fieldName + "] should be indexed with positions and offsets in the postings list to be used with postings highlighter"); throw new IllegalArgumentException("the field [" + highlighterContext.fieldName + "] should be indexed with positions and offsets in the postings list to be used with postings highlighter");
} }
@ -126,6 +126,11 @@ public class PostingsHighlighter implements Highlighter {
return null; return null;
} }
@Override
public boolean canHighlight(FieldMapper fieldMapper) {
return fieldMapper.fieldType().indexOptions() == IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS;
}
private static String mergeFieldValues(List<Object> fieldValues, char valuesSeparator) { private static String mergeFieldValues(List<Object> fieldValues, char valuesSeparator) {
//postings highlighter accepts all values in a single string, as offsets etc. need to match with content //postings highlighter accepts all values in a single string, as offsets etc. need to match with content
//loaded from stored fields, we merge all values using a proper separator //loaded from stored fields, we merge all values using a proper separator

View File

@ -21,6 +21,7 @@ package org.elasticsearch.search.highlight;
import com.google.common.collect.Lists; import com.google.common.collect.Lists;
import org.elasticsearch.common.text.StringText; import org.elasticsearch.common.text.StringText;
import org.elasticsearch.common.text.Text; import org.elasticsearch.common.text.Text;
import org.elasticsearch.index.mapper.FieldMapper;
import java.util.List; import java.util.List;
import java.util.Locale; import java.util.Locale;
@ -68,6 +69,11 @@ public class CustomHighlighter implements Highlighter {
return new HighlightField(highlighterContext.fieldName, responses.toArray(new Text[]{})); return new HighlightField(highlighterContext.fieldName, responses.toArray(new Text[]{}));
} }
@Override
public boolean canHighlight(FieldMapper fieldMapper) {
return true;
}
private static class CacheEntry { private static class CacheEntry {
private int position; private int position;
private int docId; private int docId;

View File

@ -51,12 +51,124 @@ import static org.elasticsearch.index.query.QueryBuilders.*;
import static org.elasticsearch.search.builder.SearchSourceBuilder.highlight; import static org.elasticsearch.search.builder.SearchSourceBuilder.highlight;
import static org.elasticsearch.search.builder.SearchSourceBuilder.searchSource; import static org.elasticsearch.search.builder.SearchSourceBuilder.searchSource;
import static org.elasticsearch.test.hamcrest.ElasticsearchAssertions.*; import static org.elasticsearch.test.hamcrest.ElasticsearchAssertions.*;
import static org.elasticsearch.test.hamcrest.ElasticsearchAssertions.assertNoFailures;
import static org.elasticsearch.test.hamcrest.RegexMatcher.matches; import static org.elasticsearch.test.hamcrest.RegexMatcher.matches;
import static org.hamcrest.Matchers.*; import static org.hamcrest.Matchers.*;
@Slow @Slow
public class HighlighterSearchTests extends ElasticsearchIntegrationTest { public class HighlighterSearchTests extends ElasticsearchIntegrationTest {
@Test
public void testHighlightingWithWildcardName() throws IOException {
// test the kibana case with * as fieldname that will try highlight all fields including meta fields
XContentBuilder mappings = jsonBuilder();
mappings.startObject();
mappings.startObject("type")
.startObject("properties")
.startObject("text")
.field("type", "string")
.field("analyzer", "keyword")
.field("index_options", "offsets")
.field("term_vector", "with_positions_offsets")
.endObject()
.endObject()
.endObject();
mappings.endObject();
assertAcked(prepareCreate("test")
.addMapping("type", mappings));
ensureYellow();
client().prepareIndex("test", "type", "1")
.setSource(jsonBuilder().startObject().field("text", "text").endObject())
.get();
refresh();
String highlighter = randomFrom(new String[]{"plain", "postings", "fvh"});
SearchResponse search = client().prepareSearch().setQuery(constantScoreQuery(matchQuery("text", "text"))).addHighlightedField(new Field("*").highlighterType(highlighter)).get();
assertHighlight(search, 0, "text", 0, equalTo("<em>text</em>"));
}
@Test
public void testPlainHighlighterWithLongUnanalyzedStringTerm() throws IOException {
XContentBuilder mappings = jsonBuilder();
mappings.startObject();
mappings.startObject("type")
.startObject("properties")
.startObject("long_text")
.field("type", "string")
.field("analyzer", "keyword")
.field("index_options", "offsets")
.field("term_vector", "with_positions_offsets")
.field("ignore_above", 1)
.endObject()
.startObject("text")
.field("type", "string")
.field("analyzer", "keyword")
.field("index_options", "offsets")
.field("term_vector", "with_positions_offsets")
.endObject()
.endObject()
.endObject();
mappings.endObject();
assertAcked(prepareCreate("test")
.addMapping("type", mappings));
ensureYellow();
// crate a term that is larger than the allowed 32766, index it and then try highlight on it
// the search request should still succeed
StringBuilder builder = new StringBuilder();
for (int i = 0; i < 32767; i++) {
builder.append('a');
}
client().prepareIndex("test", "type", "1")
.setSource(jsonBuilder().startObject().field("long_text", builder.toString()).field("text", "text").endObject())
.get();
refresh();
String highlighter = randomFrom(new String[]{"plain", "postings", "fvh"});
SearchResponse search = client().prepareSearch().setQuery(constantScoreQuery(matchQuery("text", "text"))).addHighlightedField(new Field("*").highlighterType(highlighter)).get();
assertHighlight(search, 0, "text", 0, equalTo("<em>text</em>"));
search = client().prepareSearch().setQuery(constantScoreQuery(matchQuery("text", "text"))).addHighlightedField(new Field("long_text").highlighterType(highlighter)).get();
assertNoFailures(search);
assertThat(search.getHits().getAt(0).getHighlightFields().size(), equalTo(0));
}
@Test
public void testHighlightingWhenFieldsAreNotStoredThereIsNoSource() throws IOException {
XContentBuilder mappings = jsonBuilder();
mappings.startObject();
mappings.startObject("type")
.startObject("_source")
.field("enabled", false)
.endObject()
.startObject("properties")
.startObject("unstored_field")
.field("index_options", "offsets")
.field("term_vector", "with_positions_offsets")
.field("type", "string")
.field("store", "no")
.endObject()
.startObject("text")
.field("index_options", "offsets")
.field("term_vector", "with_positions_offsets")
.field("type", "string")
.field("store", "yes")
.endObject()
.endObject()
.endObject();
mappings.endObject();
assertAcked(prepareCreate("test")
.addMapping("type", mappings));
ensureYellow();
client().prepareIndex("test", "type", "1")
.setSource(jsonBuilder().startObject().field("unstored_text", "text").field("text", "text").endObject())
.get();
refresh();
String highlighter = randomFrom(new String[]{"plain", "postings", "fvh"});
SearchResponse search = client().prepareSearch().setQuery(constantScoreQuery(matchQuery("text", "text"))).addHighlightedField(new Field("*").highlighterType(highlighter)).get();
assertHighlight(search, 0, "text", 0, equalTo("<em>text</em>"));
search = client().prepareSearch().setQuery(constantScoreQuery(matchQuery("text", "text"))).addHighlightedField(new Field("unstored_text")).get();
assertNoFailures(search);
assertThat(search.getHits().getAt(0).getHighlightFields().size(), equalTo(0));
}
@Test @Test
// see #3486 // see #3486
public void testHighTermFrequencyDoc() throws IOException { public void testHighTermFrequencyDoc() throws IOException {
@ -1171,12 +1283,11 @@ public class HighlighterSearchTests extends ElasticsearchIntegrationTest {
RestStatus.BAD_REQUEST, RestStatus.BAD_REQUEST,
containsString("the field [title] should be indexed with term vector with position offsets to be used with fast vector highlighter")); containsString("the field [title] should be indexed with term vector with position offsets to be used with fast vector highlighter"));
assertFailures(client().prepareSearch() //should not fail if there is a wildcard
assertNoFailures(client().prepareSearch()
.setQuery(matchPhraseQuery("title", "this is a test")) .setQuery(matchPhraseQuery("title", "this is a test"))
.addHighlightedField("tit*", 50, 1, 10) .addHighlightedField("tit*", 50, 1, 10)
.setHighlighterType("fast-vector-highlighter"), .setHighlighterType("fast-vector-highlighter").get());
RestStatus.BAD_REQUEST,
containsString("the field [title] should be indexed with term vector with position offsets to be used with fast vector highlighter"));
} }
@Test @Test
@ -2169,12 +2280,11 @@ public class HighlighterSearchTests extends ElasticsearchIntegrationTest {
RestStatus.BAD_REQUEST, RestStatus.BAD_REQUEST,
containsString("the field [title] should be indexed with positions and offsets in the postings list to be used with postings highlighter")); containsString("the field [title] should be indexed with positions and offsets in the postings list to be used with postings highlighter"));
assertFailures(client().prepareSearch() //should not fail if there is a wildcard
assertNoFailures(client().prepareSearch()
.setQuery(matchQuery("title", "this is a test")) .setQuery(matchQuery("title", "this is a test"))
.addHighlightedField("tit*") .addHighlightedField("tit*")
.setHighlighterType("postings"), .setHighlighterType("postings").get());
RestStatus.BAD_REQUEST,
containsString("the field [title] should be indexed with positions and offsets in the postings list to be used with postings highlighter"));
} }
@Test @Test