Expose fragmenter option for plain / normal highlighter.

Closes #2465
This commit is contained in:
Martijn van Groningen 2012-12-06 14:59:42 +01:00
parent c2f8ee105b
commit f72d5c1907
5 changed files with 109 additions and 6 deletions

View File

@ -50,6 +50,7 @@ public class HighlightBuilder implements ToXContent {
private String highlighterType;
private String fragmenter;
/**
* Adds a field to be highlighted with default fragment size of 100 characters, and
@ -188,6 +189,15 @@ public class HighlightBuilder implements ToXContent {
return this;
}
/**
* Sets what fragmenter to use to break up text that is eligible for highlighting.
* This option is only applicable when using plain / normal highlighter.
*/
public HighlightBuilder fragmenter(String fragmenter) {
this.fragmenter = fragmenter;
return this;
}
@Override
public XContentBuilder toXContent(XContentBuilder builder, Params params) throws IOException {
builder.startObject("highlight");
@ -212,6 +222,9 @@ public class HighlightBuilder implements ToXContent {
if (highlighterType != null) {
builder.field("type", highlighterType);
}
if (fragmenter != null) {
builder.field("fragmenter", fragmenter);
}
if (fields != null) {
builder.startObject("fields");
for (Field field : fields) {
@ -231,6 +244,9 @@ public class HighlightBuilder implements ToXContent {
if (field.highlighterType != null) {
builder.field("type", field.highlighterType);
}
if (field.fragmenter != null) {
builder.field("fragmenter", field.fragmenter);
}
builder.endObject();
}
@ -248,6 +264,7 @@ public class HighlightBuilder implements ToXContent {
int numOfFragments = -1;
Boolean requireFieldMatch;
String highlighterType;
String fragmenter;
public Field(String name) {
this.name = name;
@ -281,5 +298,10 @@ public class HighlightBuilder implements ToXContent {
this.highlighterType = highlighterType;
return this;
}
public Field fragmenter(String fragmenter) {
this.fragmenter = fragmenter;
return this;
}
}
}

View File

@ -30,6 +30,7 @@ import org.apache.lucene.search.highlight.*;
import org.apache.lucene.search.highlight.Formatter;
import org.apache.lucene.search.vectorhighlight.*;
import org.elasticsearch.ElasticSearchException;
import org.elasticsearch.ElasticSearchIllegalArgumentException;
import org.elasticsearch.common.component.AbstractComponent;
import org.elasticsearch.common.inject.Inject;
import org.elasticsearch.common.io.FastStringReader;
@ -131,13 +132,13 @@ public class HighlightPhase extends AbstractComponent implements FetchSubPhase {
useFastVectorHighlighter = mapper.storeTermVectors() && mapper.storeTermVectorOffsets() && mapper.storeTermVectorPositions();
} else if (field.highlighterType().equals("fast-vector-highlighter") || field.highlighterType().equals("fvh")) {
if (!(mapper.storeTermVectors() && mapper.storeTermVectorOffsets() && mapper.storeTermVectorPositions())) {
throw new FetchPhaseExecutionException(context, "the field [" + field.field() + "] should be indexed with term vector with position offsets to be used with fast vector highlighter");
throw new ElasticSearchIllegalArgumentException("the field [" + field.field() + "] should be indexed with term vector with position offsets to be used with fast vector highlighter");
}
useFastVectorHighlighter = true;
} else if (field.highlighterType().equals("highlighter") || field.highlighterType().equals("plain")) {
useFastVectorHighlighter = false;
} else {
throw new FetchPhaseExecutionException(context, "unknown highlighter type [" + field.highlighterType() + "] for the field [" + field.field() + "]");
throw new ElasticSearchIllegalArgumentException("unknown highlighter type [" + field.highlighterType() + "] for the field [" + field.field() + "]");
}
if (!useFastVectorHighlighter) {
MapperHighlightEntry entry = cache.mappers.get(mapper);
@ -151,8 +152,14 @@ public class HighlightPhase extends AbstractComponent implements FetchSubPhase {
Fragmenter fragmenter;
if (field.numberOfFragments() == 0) {
fragmenter = new NullFragmenter();
} else {
} else if (field.fragmenter() == null) {
fragmenter = new SimpleSpanFragmenter(queryScorer, field.fragmentCharSize());
} else if ("simple".equals(field.fragmenter())) {
fragmenter = new SimpleFragmenter(field.fragmentCharSize());
} else if ("span".equals(field.fragmenter())) {
fragmenter = new SimpleSpanFragmenter(queryScorer, field.fragmentCharSize());
} else {
throw new ElasticSearchIllegalArgumentException("unknown fragmenter option [" + field.fragmenter() + "] for the field [" + field.field() + "]");
}
Formatter formatter = new SimpleHTMLFormatter(field.preTags()[0], field.postTags()[0]);

View File

@ -77,6 +77,7 @@ public class HighlighterParseElement implements SearchParseElement {
int globalBoundaryMaxScan = SimpleBoundaryScanner2.DEFAULT_MAX_SCAN;
char[] globalBoundaryChars = SimpleBoundaryScanner2.DEFAULT_BOUNDARY_CHARS;
String globalHighlighterType = null;
String globalFragmenter = null;
while ((token = parser.nextToken()) != XContentParser.Token.END_OBJECT) {
if (token == XContentParser.Token.FIELD_NAME) {
@ -120,6 +121,8 @@ public class HighlighterParseElement implements SearchParseElement {
globalBoundaryChars = parser.text().toCharArray();
} else if ("type".equals(topLevelFieldName)) {
globalHighlighterType = parser.text();
} else if ("fragmenter".equals(topLevelFieldName)) {
globalFragmenter = parser.text();
}
} else if (token == XContentParser.Token.START_OBJECT) {
if ("fields".equals(topLevelFieldName)) {
@ -166,6 +169,8 @@ public class HighlighterParseElement implements SearchParseElement {
field.boundaryChars(parser.text().toCharArray());
} else if ("type".equals(fieldName)) {
field.highlighterType(parser.text());
} else if ("fragmenter".equals(fieldName)) {
field.fragmenter(parser.text());
}
}
}
@ -214,6 +219,9 @@ public class HighlighterParseElement implements SearchParseElement {
if (field.highlighterType() == null) {
field.highlighterType(globalHighlighterType);
}
if (field.fragmenter() == null) {
field.fragmenter(globalFragmenter);
}
}
context.highlight(new SearchContextHighlight(fields));

View File

@ -60,6 +60,8 @@ public class SearchContextHighlight {
private String highlighterType;
private String fragmenter;
private int boundaryMaxScan = -1;
private char[] boundaryChars = null;
@ -151,6 +153,14 @@ public class SearchContextHighlight {
this.highlighterType = type;
}
public String fragmenter() {
return fragmenter;
}
public void fragmenter(String fragmenter) {
this.fragmenter = fragmenter;
}
public int boundaryMaxScan() {
return boundaryMaxScan;
}

View File

@ -20,14 +20,17 @@
package org.elasticsearch.test.integration.search.highlight;
import org.elasticsearch.ElasticSearchException;
import org.elasticsearch.action.search.SearchPhaseExecutionException;
import org.elasticsearch.action.search.SearchResponse;
import org.elasticsearch.action.search.SearchType;
import org.elasticsearch.client.Client;
import org.elasticsearch.common.settings.ImmutableSettings;
import org.elasticsearch.common.xcontent.XContentBuilder;
import org.elasticsearch.common.xcontent.XContentFactory;
import org.elasticsearch.index.query.MatchQueryBuilder;
import org.elasticsearch.index.query.QueryBuilders;
import org.elasticsearch.indices.IndexMissingException;
import org.elasticsearch.rest.RestStatus;
import org.elasticsearch.search.SearchHit;
import org.elasticsearch.search.builder.SearchSourceBuilder;
import org.elasticsearch.search.highlight.HighlightBuilder;
@ -49,6 +52,7 @@ import static org.elasticsearch.search.builder.SearchSourceBuilder.searchSource;
import static org.hamcrest.MatcherAssert.assertThat;
import static org.hamcrest.Matchers.equalTo;
import static org.hamcrest.Matchers.instanceOf;
import static org.testng.Assert.fail;
/**
*
@ -915,8 +919,60 @@ public class HighlighterSearchTests extends AbstractNodesTests {
.addHighlightedField("tags", -1, 0)
.execute().actionGet();
assertThat(2, equalTo(response.hits().hits()[0].highlightFields().get("tags").fragments().length));
assertThat("this is a really long <em>tag</em> i would like to highlight", equalTo(response.hits().hits()[0].highlightFields().get("tags").fragments()[0].string()));
assertThat("here is another one that is very long and has the <em>tag</em> token near the end", equalTo(response.hits().hits()[0].highlightFields().get("tags").fragments()[1].string()));
assertThat(response.hits().hits()[0].highlightFields().get("tags").fragments().length, equalTo(2));
assertThat(response.hits().hits()[0].highlightFields().get("tags").fragments()[0].string(), equalTo("this is a really long <em>tag</em> i would like to highlight"));
assertThat(response.hits().hits()[0].highlightFields().get("tags").fragments()[1].string(), equalTo("here is another one that is very long and has the <em>tag</em> token near the end"));
}
@Test
public void testPlainHighlightDifferentFragmenter() throws Exception {
try {
client.admin().indices().prepareDelete("test").execute().actionGet();
} catch (Exception e) {
// ignore
}
client.admin().indices().prepareCreate("test").setSettings(ImmutableSettings.settingsBuilder()
.put("number_of_shards", 1).put("number_of_replicas", 0))
.addMapping("type1", jsonBuilder().startObject().startObject("type1").startObject("properties")
.startObject("tags").field("type", "string").endObject()
.endObject().endObject().endObject())
.execute().actionGet();
client.prepareIndex("test", "type1", "1")
.setSource(jsonBuilder().startObject().field("tags",
"this is a really long tag i would like to highlight",
"here is another one that is very long tag and has the tag token near the end").endObject())
.setRefresh(true).execute().actionGet();
SearchResponse response = client.prepareSearch("test")
.setQuery(QueryBuilders.matchQuery("tags", "long tag").type(MatchQueryBuilder.Type.PHRASE))
.addHighlightedField(new HighlightBuilder.Field("tags")
.fragmentSize(-1).numOfFragments(2).fragmenter("simple"))
.execute().actionGet();
assertThat(response.hits().hits()[0].highlightFields().get("tags").fragments().length, equalTo(2));
assertThat(response.hits().hits()[0].highlightFields().get("tags").fragments()[0].string(), equalTo("this is a really <em>long</em> <em>tag</em> i would like to highlight"));
assertThat(response.hits().hits()[0].highlightFields().get("tags").fragments()[1].string(), equalTo("here is another one that is very <em>long</em> <em>tag</em> and has the tag token near the end"));
response = client.prepareSearch("test")
.setQuery(QueryBuilders.matchQuery("tags", "long tag").type(MatchQueryBuilder.Type.PHRASE))
.addHighlightedField(new HighlightBuilder.Field("tags")
.fragmentSize(-1).numOfFragments(2).fragmenter("span"))
.execute().actionGet();
assertThat(response.hits().hits()[0].highlightFields().get("tags").fragments().length, equalTo(2));
assertThat(response.hits().hits()[0].highlightFields().get("tags").fragments()[0].string(), equalTo("this is a really <em>long</em> <em>tag</em> i would like to highlight"));
assertThat(response.hits().hits()[0].highlightFields().get("tags").fragments()[1].string(), equalTo("here is another one that is very <em>long</em> <em>tag</em> and has the tag token near the end"));
try {
client.prepareSearch("test")
.setQuery(QueryBuilders.matchQuery("tags", "long tag").type(MatchQueryBuilder.Type.PHRASE))
.addHighlightedField(new HighlightBuilder.Field("tags")
.fragmentSize(-1).numOfFragments(2).fragmenter("invalid"))
.execute().actionGet();
fail("Shouldn't get here");
} catch (SearchPhaseExecutionException e) {
assertThat(e.shardFailures()[0].status(), equalTo(RestStatus.BAD_REQUEST));
}
}
}