Highlighting can return excerpt with no highlights

You can configure the highlighting api to return an excerpt of a field
even if there wasn't a match on the field.

The FVH makes excerpts from the beginning of the string to the first
boundary character after the requested length or the boundary_max_scan,
whichever comes first.  The Plain highlighter makes excerpts from the
beginning of the string to the end of the last token before the requested
length.

Closes #1171
This commit is contained in:
Nik Everett 2013-09-03 14:25:58 -04:00 committed by Luca Cavanna
parent 919720ab4f
commit 14a709f563
9 changed files with 330 additions and 19 deletions

View File

@ -164,6 +164,28 @@ is required. Note that `fragment_size` is ignored in this case.
When using `fast-vector-highlighter` one can use `fragment_offset`
parameter to control the margin to start highlighting from.
coming[0.90.6]
It is also possible to ask Elasticsearch to return a fragment from the
beginning of the field in the case where there are no matches by setting
`no_match_size` to something greater than 0. The default is 0.
[source,js]
--------------------------------------------------
{
"query" : {...},
"highlight" : {
"fields" : {
"content" : {
"fragment_size" : 150,
"number_of_fragments" : 3,
"no_match_size": 150
}
}
}
}
--------------------------------------------------
==== Highlight query
It is also possible to highlight against a query other than the search

View File

@ -692,6 +692,17 @@ public class SearchRequestBuilder extends ActionRequestBuilder<SearchRequest, Se
return this;
}
/**
* Sets the size of the fragment to return from the beginning of the field if there are no matches to
* highlight and the field doesn't also define noMatchSize.
* @param noMatchSize integer to set or null to leave out of request. default is null.
* @return this builder for chaining
*/
public SearchRequestBuilder setHighlighterNoMatchSize(Integer noMatchSize) {
highlightBuilder().noMatchSize(noMatchSize);
return this;
}
public SearchRequestBuilder setHighlighterOptions(Map<String, Object> options) {
highlightBuilder().options(options);
return this;

View File

@ -23,6 +23,7 @@ import org.apache.lucene.search.highlight.DefaultEncoder;
import org.apache.lucene.search.highlight.Encoder;
import org.apache.lucene.search.highlight.SimpleHTMLEncoder;
import org.apache.lucene.search.vectorhighlight.*;
import org.apache.lucene.search.vectorhighlight.FieldPhraseList.WeightedPhraseInfo;
import org.elasticsearch.ElasticSearchIllegalArgumentException;
import org.elasticsearch.common.inject.Inject;
import org.elasticsearch.common.settings.Settings;
@ -35,6 +36,7 @@ import org.elasticsearch.search.highlight.vectorhighlight.SourceScoreOrderFragme
import org.elasticsearch.search.highlight.vectorhighlight.SourceSimpleFragmentsBuilder;
import org.elasticsearch.search.internal.SearchContext;
import java.util.Collections;
import java.util.Map;
/**
@ -150,11 +152,21 @@ public class FastVectorHighlighter implements Highlighter {
if (fragments != null && fragments.length > 0) {
return new HighlightField(field.field(), StringText.convertFromStringArray(fragments));
}
int noMatchSize = highlighterContext.field.noMatchSize();
if (noMatchSize > 0) {
// Essentially we just request that a fragment is built from 0 to noMatchSize using the normal fragmentsBuilder
FieldFragList fieldFragList = new SimpleFieldFragList(-1 /*ignored*/);
fieldFragList.add(0, noMatchSize, Collections.<WeightedPhraseInfo>emptyList());
fragments = entry.fragmentsBuilder.createFragments(hitContext.reader(), hitContext.docId(), mapper.names().indexName(),
fieldFragList, 1, field.preTags(), field.postTags(), encoder);
if (fragments != null && fragments.length > 0) {
return new HighlightField(field.field(), StringText.convertFromStringArray(fragments));
}
}
return null;
} catch (Exception e) {
throw new FetchPhaseExecutionException(context, "Failed to highlight field [" + highlighterContext.fieldName + "]", e);
}
return null;
}
private class MapperHighlightEntry {

View File

@ -56,6 +56,8 @@ public class HighlightBuilder implements ToXContent {
private QueryBuilder highlightQuery;
private Integer noMatchSize;
private Map<String, Object> options;
/**
@ -212,6 +214,17 @@ public class HighlightBuilder implements ToXContent {
return this;
}
/**
* Sets the size of the fragment to return from the beginning of the field if there are no matches to
* highlight and the field doesn't also define noMatchSize.
* @param noMatchSize integer to set or null to leave out of request. default is null.
* @return this for chaining
*/
public HighlightBuilder noMatchSize(Integer noMatchSize) {
this.noMatchSize = noMatchSize;
return this;
}
/**
* Allows to set custom options for custom highlighters.
*/
@ -250,6 +263,9 @@ public class HighlightBuilder implements ToXContent {
if (highlightQuery != null) {
builder.field("highlight_query", highlightQuery);
}
if (noMatchSize != null) {
builder.field("no_match_size", noMatchSize);
}
if (options != null && options.size() > 0) {
builder.field("options", options);
}
@ -296,6 +312,9 @@ public class HighlightBuilder implements ToXContent {
if (field.highlightQuery != null) {
builder.field("highlight_query", field.highlightQuery);
}
if (field.noMatchSize != null) {
builder.field("no_match_size", field.noMatchSize);
}
if (field.options != null && field.options.size() > 0) {
builder.field("options", field.options);
}
@ -324,6 +343,7 @@ public class HighlightBuilder implements ToXContent {
String highlighterType;
String fragmenter;
QueryBuilder highlightQuery;
Integer noMatchSize;
Map<String, Object> options;
public Field(String name) {
@ -426,6 +446,17 @@ public class HighlightBuilder implements ToXContent {
return this;
}
/**
* Sets the size of the fragment to return from the beginning of the field if there are no matches to
* highlight.
* @param noMatchSize integer to set or null to leave out of request. default is null.
* @return this for chaining
*/
public Field noMatchSize(Integer noMatchSize) {
this.noMatchSize = noMatchSize;
return this;
}
/**
* Allows to set custom options for custom highlighters.
* This overrides global settings set by {@link HighlightBuilder#options(Map<String, Object>)}.

View File

@ -82,6 +82,7 @@ public class HighlighterParseElement implements SearchParseElement {
String globalFragmenter = null;
Map<String, Object> globalOptions = null;
Query globalHighlightQuery = null;
int globalNoMatchSize = 0;
while ((token = parser.nextToken()) != XContentParser.Token.END_OBJECT) {
if (token == XContentParser.Token.FIELD_NAME) {
@ -131,6 +132,8 @@ public class HighlighterParseElement implements SearchParseElement {
globalHighlighterType = parser.text();
} else if ("fragmenter".equals(topLevelFieldName)) {
globalFragmenter = parser.text();
} else if ("no_match_size".equals(topLevelFieldName) || "noMatchSize".equals(topLevelFieldName)) {
globalNoMatchSize = parser.intValue();
}
} else if (token == XContentParser.Token.START_OBJECT && "options".equals(topLevelFieldName)) {
globalOptions = parser.map();
@ -186,6 +189,8 @@ public class HighlighterParseElement implements SearchParseElement {
field.highlighterType(parser.text());
} else if ("fragmenter".equals(fieldName)) {
field.fragmenter(parser.text());
} else if ("no_match_size".equals(fieldName) || "noMatchSize".equals(fieldName)) {
field.noMatchSize(parser.intValue());
}
} else if (token == XContentParser.Token.START_OBJECT) {
if ("highlight_query".equals(fieldName) || "highlightQuery".equals(fieldName)) {
@ -251,6 +256,9 @@ public class HighlighterParseElement implements SearchParseElement {
if (field.highlightQuery() == null && globalHighlightQuery != null) {
field.highlightQuery(globalHighlightQuery);
}
if (field.noMatchSize() == -1) {
field.noMatchSize(globalNoMatchSize);
}
}
context.highlight(new SearchContextHighlight(fields));

View File

@ -30,6 +30,7 @@ import org.apache.lucene.search.highlight.*;
import org.apache.lucene.util.CollectionUtil;
import org.elasticsearch.ElasticSearchIllegalArgumentException;
import org.elasticsearch.common.text.StringText;
import org.elasticsearch.common.text.Text;
import org.elasticsearch.index.fieldvisitor.CustomFieldsVisitor;
import org.elasticsearch.index.mapper.FieldMapper;
import org.elasticsearch.search.fetch.FetchPhaseExecutionException;
@ -37,6 +38,7 @@ import org.elasticsearch.search.fetch.FetchSubPhase;
import org.elasticsearch.search.internal.SearchContext;
import org.elasticsearch.search.lookup.SearchLookup;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Comparator;
import java.util.List;
@ -164,10 +166,51 @@ public class PlainHighlighter implements Highlighter {
if (fragments != null && fragments.length > 0) {
return new HighlightField(highlighterContext.fieldName, StringText.convertFromStringArray(fragments));
}
int noMatchSize = highlighterContext.field.noMatchSize();
if (noMatchSize > 0 && textsToHighlight.size() >= 1) {
// Pull an excerpt from the beginning of the string but make sure to split the string on a term boundary.
String fieldContents = textsToHighlight.get(0).toString();
Analyzer analyzer = context.mapperService().documentMapper(hitContext.hit().type()).mappers().indexAnalyzer();
int end;
try {
end = findGoodEndForNoHighlightExcerpt(noMatchSize, analyzer.tokenStream(mapper.names().indexName(), fieldContents));
} catch (Exception e) {
throw new FetchPhaseExecutionException(context, "Failed to highlight field [" + highlighterContext.fieldName + "]", e);
}
if (end > 0) {
return new HighlightField(highlighterContext.fieldName, new Text[] { new StringText(fieldContents.substring(0, end)) });
}
}
return null;
}
private int findGoodEndForNoHighlightExcerpt(int noMatchSize, TokenStream tokenStream) throws IOException {
try {
if (!tokenStream.hasAttribute(OffsetAttribute.class)) {
// Can't split on term boundaries without offsets
return -1;
}
int end = -1;
tokenStream.reset();
while (tokenStream.incrementToken()) {
OffsetAttribute attr = tokenStream.getAttribute(OffsetAttribute.class);
if (attr.endOffset() >= noMatchSize) {
// Jump to the end of this token if it wouldn't put us past the boundary
if (attr.endOffset() == noMatchSize) {
end = noMatchSize;
}
return end;
}
end = attr.endOffset();
}
// We've exhausted the token stream so we should just highlight everything.
return end;
} finally {
tokenStream.end();
tokenStream.close();
}
}
private static class Encoders {
public static Encoder DEFAULT = new DefaultEncoder();
public static Encoder HTML = new SimpleHTMLEncoder();

View File

@ -40,7 +40,7 @@ public class SearchContextHighlight {
}
public static class Field {
// Fields that default to null or -1 are often set to their real default in HighlighterParseElement#parse
private final String field;
private int fragmentCharSize = -1;
@ -66,10 +66,13 @@ public class SearchContextHighlight {
private String fragmenter;
private int boundaryMaxScan = -1;
private Character[] boundaryChars = null;
private Query highlightQuery;
private int noMatchSize = -1;
private Map<String, Object> options;
public Field(String field) {
@ -192,6 +195,14 @@ public class SearchContextHighlight {
this.highlightQuery = highlightQuery;
}
public int noMatchSize() {
return noMatchSize;
}
public void noMatchSize(int noMatchSize) {
this.noMatchSize = noMatchSize;
}
public Map<String, Object> options() {
return options;
}

View File

@ -50,8 +50,7 @@ import static org.elasticsearch.common.xcontent.XContentFactory.jsonBuilder;
import static org.elasticsearch.index.query.QueryBuilders.*;
import static org.elasticsearch.search.builder.SearchSourceBuilder.highlight;
import static org.elasticsearch.search.builder.SearchSourceBuilder.searchSource;
import static org.elasticsearch.test.hamcrest.ElasticsearchAssertions.assertHighlight;
import static org.elasticsearch.test.hamcrest.ElasticsearchAssertions.assertNoFailures;
import static org.elasticsearch.test.hamcrest.ElasticsearchAssertions.*;
import static org.hamcrest.Matchers.*;
/**
@ -1602,17 +1601,8 @@ public class HighlighterSearchTests extends AbstractIntegrationTest {
@Test
public void testHighlightUsesHighlightQuery() throws IOException {
prepareCreate("test")
.addMapping("type1", jsonBuilder().startObject()
.startObject("type")
.startObject("properties")
.startObject("text")
.field("type", "string")
.field("store", "yes")
.field("term_vector", "with_positions_offsets")
.endObject()
.endObject()
.endObject()
.endObject()).get();
.addMapping("type1", "text", "type=string,store=yes,term_vector=with_positions_offsets")
.get();
ensureGreen();
index("test", "type1", "1", "text", "some stuff stuff stuff stuff stuff to highlight against the stuff phrase");
@ -1621,7 +1611,7 @@ public class HighlighterSearchTests extends AbstractIntegrationTest {
// Make sure the fvh doesn't highlight in the same way as we're going to do with a scoreQuery because
// that would invalidate the test results.
Matcher<String> highlightedMatcher = anyOf(
containsString("<em>stuff phrase</em>"), // FHV normally does this
containsString("<em>stuff phrase</em>"), //t FHV normally does this
containsString("<em>stuff</em> <em>phrase</em>")); // Plain normally does this
HighlightBuilder.Field field = new HighlightBuilder.Field("text")
.fragmentSize(20)
@ -1662,4 +1652,181 @@ public class HighlighterSearchTests extends AbstractIntegrationTest {
assertHighlight(response, 0, "text", 0, highlightedMatcher);
// Note that the plain highlighter doesn't join the highlighted elements for us
}
public void testHighlightNoMatchSize() throws IOException {
prepareCreate("test")
.addMapping("type1", "text", "type=string,store=yes,term_vector=with_positions_offsets")
.get();
ensureGreen();
String text = "I am pretty long so some of me should get cut off";
index("test", "type1", "1", "text", text);
refresh();
// When you don't set noMatchSize you don't get any results if there isn't anything to highlight.
HighlightBuilder.Field field = new HighlightBuilder.Field("text")
.fragmentSize(21)
.numOfFragments(1)
.highlighterType("plain");
SearchResponse response = client().prepareSearch("test").addHighlightedField(field).get();
assertNotHighlighted(response, 0, "text");
field.highlighterType("fvh");
response = client().prepareSearch("test").addHighlightedField(field).get();
assertNotHighlighted(response, 0, "text");
// When noMatchSize is set to 0 you also shouldn't get any
field.highlighterType("plain").noMatchSize(0);
response = client().prepareSearch("test").addHighlightedField(field).get();
assertNotHighlighted(response, 0, "text");
field.highlighterType("fvh");
response = client().prepareSearch("test").addHighlightedField(field).get();
assertNotHighlighted(response, 0, "text");
// When noMatchSize is between 0 and the size of the string
field.highlighterType("plain").noMatchSize(21);
response = client().prepareSearch("test").addHighlightedField(field).get();
assertHighlight(response, 0, "text", 0, equalTo("I am pretty long so"));
// The FVH also works but the fragment is longer than the plain highlighter because of boundary_max_scan
field.highlighterType("fvh");
response = client().prepareSearch("test").addHighlightedField(field).get();
assertHighlight(response, 0, "text", 0, equalTo("I am pretty long so some"));
// We can also ask for a fragment longer than the input string and get the whole string
field.highlighterType("plain").noMatchSize(text.length() * 2);
response = client().prepareSearch("test").addHighlightedField(field).get();
assertHighlight(response, 0, "text", 0, equalTo(text));
// Same for the fvh
field.highlighterType("fvh");
response = client().prepareSearch("test").addHighlightedField(field).get();
assertHighlight(response, 0, "text", 0, equalTo(text));
// We can also ask for a fragment exactly the size of the input field and get the whole field
field.highlighterType("plain").noMatchSize(text.length());
response = client().prepareSearch("test").addHighlightedField(field).get();
assertHighlight(response, 0, "text", 0, equalTo(text));
// Same for the fvh
field.highlighterType("fvh");
response = client().prepareSearch("test").addHighlightedField(field).get();
assertHighlight(response, 0, "text", 0, equalTo(text));
// You can set noMatchSize globally in the highlighter as well
field.highlighterType("plain").noMatchSize(null);
response = client().prepareSearch("test").setHighlighterNoMatchSize(21).addHighlightedField(field).get();
assertHighlight(response, 0, "text", 0, equalTo("I am pretty long so"));
// Same for the fvh
field.highlighterType("fvh");
response = client().prepareSearch("test").setHighlighterNoMatchSize(21).addHighlightedField(field).get();
assertHighlight(response, 0, "text", 0, equalTo("I am pretty long so some"));
}
@Test
public void testHighlightNoMatchSizeWithMultivaluedFields() throws IOException {
prepareCreate("test")
.addMapping("type1", jsonBuilder()
.startObject()
.startObject("type1")
.startObject("properties")
.startObject("text")
.field("type", "string")
.field("store", "yes")
.field("term_vector", "with_positions_offsets")
.endObject()
.endObject()
.endObject()
.endObject())
.get();
ensureGreen();
String text1 = "I am pretty long so some of me should get cut off";
String text2 = "I am short";
index("test", "type1", "1", "text", new String[] {text1, text2});
refresh();
// The no match fragment should come from the first field of a multi-valued field
HighlightBuilder.Field field = new HighlightBuilder.Field("text")
.fragmentSize(21)
.numOfFragments(1)
.highlighterType("plain")
.noMatchSize(21);
SearchResponse response = client().prepareSearch("test").addHighlightedField(field).get();
assertHighlight(response, 0, "text", 0, equalTo("I am pretty long so"));
// And the fvh should work as well
field.highlighterType("fvh");
response = client().prepareSearch("test").addHighlightedField(field).get();
assertHighlight(response, 0, "text", 0, equalTo("I am pretty long so some"));
// And noMatchSize returns nothing when the first entry is empty string!
assert(!client().prepareDelete("test", "type1", "1").get().isNotFound());
index("test", "type1", "1", "text", new String[] {"", text2});
refresh();
field.highlighterType("plain");
response = client().prepareSearch("test").addHighlightedField(field).get();
assertNoFailures(response);
assertThat("not enough hits", response.getHits().hits().length, greaterThan(0));
assertThat(response.getHits().hits()[0].getHighlightFields(), not(hasKey("text")));
// And the fvh should work as well
field.highlighterType("fvh");
response = client().prepareSearch("test").addHighlightedField(field).get();
assertNoFailures(response);
assertThat("not enough hits", response.getHits().hits().length, greaterThan(0));
assertThat(response.getHits().hits()[0].getHighlightFields(), not(hasKey("text")));
// But if the field was actually empty then you should get no highlighting field
assert(!client().prepareDelete("test", "type1", "1").get().isNotFound());
index("test", "type1", "1", "text", new String[] {});
refresh();
field.highlighterType("plain");
response = client().prepareSearch("test").addHighlightedField(field).get();
assertNoFailures(response);
assertThat("not enough hits", response.getHits().hits().length, greaterThan(0));
assertThat(response.getHits().hits()[0].getHighlightFields(), not(hasKey("text")));
// And the fvh should work as well
field.highlighterType("fvh");
response = client().prepareSearch("test").addHighlightedField(field).get();
assertNoFailures(response);
assertThat("not enough hits", response.getHits().hits().length, greaterThan(0));
assertThat(response.getHits().hits()[0].getHighlightFields(), not(hasKey("text")));
// Same for if the field doesn't even exist
assert(!client().prepareDelete("test", "type1", "1").get().isNotFound());
index("test", "type1", "1");
refresh();
field.highlighterType("plain");
response = client().prepareSearch("test").addHighlightedField(field).get();
assertNoFailures(response);
assertThat("not enough hits", response.getHits().hits().length, greaterThan(0));
assertThat(response.getHits().hits()[0].getHighlightFields(), not(hasKey("text")));
// And the fvh should work as well
field.highlighterType("fvh");
response = client().prepareSearch("test").addHighlightedField(field).get();
assertNoFailures(response);
assertThat("not enough hits", response.getHits().hits().length, greaterThan(0));
assertThat(response.getHits().hits()[0].getHighlightFields(), not(hasKey("text")));
// Again same if the field isn't mapped
field = new HighlightBuilder.Field("unmapped")
.highlighterType("plain")
.noMatchSize(21);
response = client().prepareSearch("test").addHighlightedField(field).get();
assertNoFailures(response);
assertThat("not enough hits", response.getHits().hits().length, greaterThan(0));
assertThat(response.getHits().hits()[0].getHighlightFields(), not(hasKey("unmapped")));
// And the fvh should work as well
field.highlighterType("fvh");
response = client().prepareSearch("test").addHighlightedField(field).get();
assertNoFailures(response);
assertThat("not enough hits", response.getHits().hits().length, greaterThan(0));
assertThat(response.getHits().hits()[0].getHighlightFields(), not(hasKey("unmapped")));
}
}

View File

@ -189,12 +189,18 @@ public class ElasticsearchAssertions {
public static void assertHighlight(SearchResponse resp, int hit, String field, int fragment, Matcher<String> matcher) {
assertNoFailures(resp);
assertThat("not enough hits", resp.getHits().hits().length, greaterThan(hit));
assertThat(resp.getHits().hits()[hit].getHighlightFields().get(field), notNullValue());
assertThat(resp.getHits().hits()[hit].getHighlightFields(), hasKey(field));
assertThat(resp.getHits().hits()[hit].getHighlightFields().get(field).fragments().length, greaterThan(fragment));
assertThat(resp.getHits().hits()[hit].highlightFields().get(field).fragments()[fragment].string(), matcher);
assertVersionSerializable(resp);
}
public static void assertNotHighlighted(SearchResponse resp, int hit, String field) {
assertNoFailures(resp);
assertThat("not enough hits", resp.getHits().hits().length, greaterThan(hit));
assertThat(resp.getHits().hits()[hit].getHighlightFields(), not(hasKey(field)));
}
public static void assertSuggestionSize(Suggest searchSuggest, int entry, int size, String key) {
assertThat(searchSuggest, notNullValue());
assertThat(searchSuggest.size(), greaterThanOrEqualTo(1));