Expose fragmenter option for plain / normal highlighter.

Closes #2465
This commit is contained in:
Martijn van Groningen 2012-12-06 14:59:42 +01:00
parent c2f8ee105b
commit f72d5c1907
5 changed files with 109 additions and 6 deletions

View File

@ -50,6 +50,7 @@ public class HighlightBuilder implements ToXContent {
private String highlighterType; private String highlighterType;
private String fragmenter;
/** /**
* Adds a field to be highlighted with default fragment size of 100 characters, and * Adds a field to be highlighted with default fragment size of 100 characters, and
@ -188,6 +189,15 @@ public class HighlightBuilder implements ToXContent {
return this; return this;
} }
/**
* Sets what fragmenter to use to break up text that is eligible for highlighting.
* This option is only applicable when using plain / normal highlighter.
*/
public HighlightBuilder fragmenter(String fragmenter) {
this.fragmenter = fragmenter;
return this;
}
@Override @Override
public XContentBuilder toXContent(XContentBuilder builder, Params params) throws IOException { public XContentBuilder toXContent(XContentBuilder builder, Params params) throws IOException {
builder.startObject("highlight"); builder.startObject("highlight");
@ -212,6 +222,9 @@ public class HighlightBuilder implements ToXContent {
if (highlighterType != null) { if (highlighterType != null) {
builder.field("type", highlighterType); builder.field("type", highlighterType);
} }
if (fragmenter != null) {
builder.field("fragmenter", fragmenter);
}
if (fields != null) { if (fields != null) {
builder.startObject("fields"); builder.startObject("fields");
for (Field field : fields) { for (Field field : fields) {
@ -231,6 +244,9 @@ public class HighlightBuilder implements ToXContent {
if (field.highlighterType != null) { if (field.highlighterType != null) {
builder.field("type", field.highlighterType); builder.field("type", field.highlighterType);
} }
if (field.fragmenter != null) {
builder.field("fragmenter", field.fragmenter);
}
builder.endObject(); builder.endObject();
} }
@ -248,6 +264,7 @@ public class HighlightBuilder implements ToXContent {
int numOfFragments = -1; int numOfFragments = -1;
Boolean requireFieldMatch; Boolean requireFieldMatch;
String highlighterType; String highlighterType;
String fragmenter;
public Field(String name) { public Field(String name) {
this.name = name; this.name = name;
@ -281,5 +298,10 @@ public class HighlightBuilder implements ToXContent {
this.highlighterType = highlighterType; this.highlighterType = highlighterType;
return this; return this;
} }
public Field fragmenter(String fragmenter) {
this.fragmenter = fragmenter;
return this;
}
} }
} }

View File

@ -30,6 +30,7 @@ import org.apache.lucene.search.highlight.*;
import org.apache.lucene.search.highlight.Formatter; import org.apache.lucene.search.highlight.Formatter;
import org.apache.lucene.search.vectorhighlight.*; import org.apache.lucene.search.vectorhighlight.*;
import org.elasticsearch.ElasticSearchException; import org.elasticsearch.ElasticSearchException;
import org.elasticsearch.ElasticSearchIllegalArgumentException;
import org.elasticsearch.common.component.AbstractComponent; import org.elasticsearch.common.component.AbstractComponent;
import org.elasticsearch.common.inject.Inject; import org.elasticsearch.common.inject.Inject;
import org.elasticsearch.common.io.FastStringReader; import org.elasticsearch.common.io.FastStringReader;
@ -131,13 +132,13 @@ public class HighlightPhase extends AbstractComponent implements FetchSubPhase {
useFastVectorHighlighter = mapper.storeTermVectors() && mapper.storeTermVectorOffsets() && mapper.storeTermVectorPositions(); useFastVectorHighlighter = mapper.storeTermVectors() && mapper.storeTermVectorOffsets() && mapper.storeTermVectorPositions();
} else if (field.highlighterType().equals("fast-vector-highlighter") || field.highlighterType().equals("fvh")) { } else if (field.highlighterType().equals("fast-vector-highlighter") || field.highlighterType().equals("fvh")) {
if (!(mapper.storeTermVectors() && mapper.storeTermVectorOffsets() && mapper.storeTermVectorPositions())) { if (!(mapper.storeTermVectors() && mapper.storeTermVectorOffsets() && mapper.storeTermVectorPositions())) {
throw new FetchPhaseExecutionException(context, "the field [" + field.field() + "] should be indexed with term vector with position offsets to be used with fast vector highlighter"); throw new ElasticSearchIllegalArgumentException("the field [" + field.field() + "] should be indexed with term vector with position offsets to be used with fast vector highlighter");
} }
useFastVectorHighlighter = true; useFastVectorHighlighter = true;
} else if (field.highlighterType().equals("highlighter") || field.highlighterType().equals("plain")) { } else if (field.highlighterType().equals("highlighter") || field.highlighterType().equals("plain")) {
useFastVectorHighlighter = false; useFastVectorHighlighter = false;
} else { } else {
throw new FetchPhaseExecutionException(context, "unknown highlighter type [" + field.highlighterType() + "] for the field [" + field.field() + "]"); throw new ElasticSearchIllegalArgumentException("unknown highlighter type [" + field.highlighterType() + "] for the field [" + field.field() + "]");
} }
if (!useFastVectorHighlighter) { if (!useFastVectorHighlighter) {
MapperHighlightEntry entry = cache.mappers.get(mapper); MapperHighlightEntry entry = cache.mappers.get(mapper);
@ -151,8 +152,14 @@ public class HighlightPhase extends AbstractComponent implements FetchSubPhase {
Fragmenter fragmenter; Fragmenter fragmenter;
if (field.numberOfFragments() == 0) { if (field.numberOfFragments() == 0) {
fragmenter = new NullFragmenter(); fragmenter = new NullFragmenter();
} else { } else if (field.fragmenter() == null) {
fragmenter = new SimpleSpanFragmenter(queryScorer, field.fragmentCharSize()); fragmenter = new SimpleSpanFragmenter(queryScorer, field.fragmentCharSize());
} else if ("simple".equals(field.fragmenter())) {
fragmenter = new SimpleFragmenter(field.fragmentCharSize());
} else if ("span".equals(field.fragmenter())) {
fragmenter = new SimpleSpanFragmenter(queryScorer, field.fragmentCharSize());
} else {
throw new ElasticSearchIllegalArgumentException("unknown fragmenter option [" + field.fragmenter() + "] for the field [" + field.field() + "]");
} }
Formatter formatter = new SimpleHTMLFormatter(field.preTags()[0], field.postTags()[0]); Formatter formatter = new SimpleHTMLFormatter(field.preTags()[0], field.postTags()[0]);

View File

@ -77,6 +77,7 @@ public class HighlighterParseElement implements SearchParseElement {
int globalBoundaryMaxScan = SimpleBoundaryScanner2.DEFAULT_MAX_SCAN; int globalBoundaryMaxScan = SimpleBoundaryScanner2.DEFAULT_MAX_SCAN;
char[] globalBoundaryChars = SimpleBoundaryScanner2.DEFAULT_BOUNDARY_CHARS; char[] globalBoundaryChars = SimpleBoundaryScanner2.DEFAULT_BOUNDARY_CHARS;
String globalHighlighterType = null; String globalHighlighterType = null;
String globalFragmenter = null;
while ((token = parser.nextToken()) != XContentParser.Token.END_OBJECT) { while ((token = parser.nextToken()) != XContentParser.Token.END_OBJECT) {
if (token == XContentParser.Token.FIELD_NAME) { if (token == XContentParser.Token.FIELD_NAME) {
@ -120,6 +121,8 @@ public class HighlighterParseElement implements SearchParseElement {
globalBoundaryChars = parser.text().toCharArray(); globalBoundaryChars = parser.text().toCharArray();
} else if ("type".equals(topLevelFieldName)) { } else if ("type".equals(topLevelFieldName)) {
globalHighlighterType = parser.text(); globalHighlighterType = parser.text();
} else if ("fragmenter".equals(topLevelFieldName)) {
globalFragmenter = parser.text();
} }
} else if (token == XContentParser.Token.START_OBJECT) { } else if (token == XContentParser.Token.START_OBJECT) {
if ("fields".equals(topLevelFieldName)) { if ("fields".equals(topLevelFieldName)) {
@ -166,6 +169,8 @@ public class HighlighterParseElement implements SearchParseElement {
field.boundaryChars(parser.text().toCharArray()); field.boundaryChars(parser.text().toCharArray());
} else if ("type".equals(fieldName)) { } else if ("type".equals(fieldName)) {
field.highlighterType(parser.text()); field.highlighterType(parser.text());
} else if ("fragmenter".equals(fieldName)) {
field.fragmenter(parser.text());
} }
} }
} }
@ -214,6 +219,9 @@ public class HighlighterParseElement implements SearchParseElement {
if (field.highlighterType() == null) { if (field.highlighterType() == null) {
field.highlighterType(globalHighlighterType); field.highlighterType(globalHighlighterType);
} }
if (field.fragmenter() == null) {
field.fragmenter(globalFragmenter);
}
} }
context.highlight(new SearchContextHighlight(fields)); context.highlight(new SearchContextHighlight(fields));

View File

@ -60,6 +60,8 @@ public class SearchContextHighlight {
private String highlighterType; private String highlighterType;
private String fragmenter;
private int boundaryMaxScan = -1; private int boundaryMaxScan = -1;
private char[] boundaryChars = null; private char[] boundaryChars = null;
@ -151,6 +153,14 @@ public class SearchContextHighlight {
this.highlighterType = type; this.highlighterType = type;
} }
public String fragmenter() {
return fragmenter;
}
public void fragmenter(String fragmenter) {
this.fragmenter = fragmenter;
}
public int boundaryMaxScan() { public int boundaryMaxScan() {
return boundaryMaxScan; return boundaryMaxScan;
} }

View File

@ -20,14 +20,17 @@
package org.elasticsearch.test.integration.search.highlight; package org.elasticsearch.test.integration.search.highlight;
import org.elasticsearch.ElasticSearchException; import org.elasticsearch.ElasticSearchException;
import org.elasticsearch.action.search.SearchPhaseExecutionException;
import org.elasticsearch.action.search.SearchResponse; import org.elasticsearch.action.search.SearchResponse;
import org.elasticsearch.action.search.SearchType; import org.elasticsearch.action.search.SearchType;
import org.elasticsearch.client.Client; import org.elasticsearch.client.Client;
import org.elasticsearch.common.settings.ImmutableSettings; import org.elasticsearch.common.settings.ImmutableSettings;
import org.elasticsearch.common.xcontent.XContentBuilder; import org.elasticsearch.common.xcontent.XContentBuilder;
import org.elasticsearch.common.xcontent.XContentFactory; import org.elasticsearch.common.xcontent.XContentFactory;
import org.elasticsearch.index.query.MatchQueryBuilder;
import org.elasticsearch.index.query.QueryBuilders; import org.elasticsearch.index.query.QueryBuilders;
import org.elasticsearch.indices.IndexMissingException; import org.elasticsearch.indices.IndexMissingException;
import org.elasticsearch.rest.RestStatus;
import org.elasticsearch.search.SearchHit; import org.elasticsearch.search.SearchHit;
import org.elasticsearch.search.builder.SearchSourceBuilder; import org.elasticsearch.search.builder.SearchSourceBuilder;
import org.elasticsearch.search.highlight.HighlightBuilder; import org.elasticsearch.search.highlight.HighlightBuilder;
@ -49,6 +52,7 @@ import static org.elasticsearch.search.builder.SearchSourceBuilder.searchSource;
import static org.hamcrest.MatcherAssert.assertThat; import static org.hamcrest.MatcherAssert.assertThat;
import static org.hamcrest.Matchers.equalTo; import static org.hamcrest.Matchers.equalTo;
import static org.hamcrest.Matchers.instanceOf; import static org.hamcrest.Matchers.instanceOf;
import static org.testng.Assert.fail;
/** /**
* *
@ -915,8 +919,60 @@ public class HighlighterSearchTests extends AbstractNodesTests {
.addHighlightedField("tags", -1, 0) .addHighlightedField("tags", -1, 0)
.execute().actionGet(); .execute().actionGet();
assertThat(2, equalTo(response.hits().hits()[0].highlightFields().get("tags").fragments().length)); assertThat(response.hits().hits()[0].highlightFields().get("tags").fragments().length, equalTo(2));
assertThat("this is a really long <em>tag</em> i would like to highlight", equalTo(response.hits().hits()[0].highlightFields().get("tags").fragments()[0].string())); assertThat(response.hits().hits()[0].highlightFields().get("tags").fragments()[0].string(), equalTo("this is a really long <em>tag</em> i would like to highlight"));
assertThat("here is another one that is very long and has the <em>tag</em> token near the end", equalTo(response.hits().hits()[0].highlightFields().get("tags").fragments()[1].string())); assertThat(response.hits().hits()[0].highlightFields().get("tags").fragments()[1].string(), equalTo("here is another one that is very long and has the <em>tag</em> token near the end"));
} }
@Test
public void testPlainHighlightDifferentFragmenter() throws Exception {
try {
client.admin().indices().prepareDelete("test").execute().actionGet();
} catch (Exception e) {
// ignore
}
client.admin().indices().prepareCreate("test").setSettings(ImmutableSettings.settingsBuilder()
.put("number_of_shards", 1).put("number_of_replicas", 0))
.addMapping("type1", jsonBuilder().startObject().startObject("type1").startObject("properties")
.startObject("tags").field("type", "string").endObject()
.endObject().endObject().endObject())
.execute().actionGet();
client.prepareIndex("test", "type1", "1")
.setSource(jsonBuilder().startObject().field("tags",
"this is a really long tag i would like to highlight",
"here is another one that is very long tag and has the tag token near the end").endObject())
.setRefresh(true).execute().actionGet();
SearchResponse response = client.prepareSearch("test")
.setQuery(QueryBuilders.matchQuery("tags", "long tag").type(MatchQueryBuilder.Type.PHRASE))
.addHighlightedField(new HighlightBuilder.Field("tags")
.fragmentSize(-1).numOfFragments(2).fragmenter("simple"))
.execute().actionGet();
assertThat(response.hits().hits()[0].highlightFields().get("tags").fragments().length, equalTo(2));
assertThat(response.hits().hits()[0].highlightFields().get("tags").fragments()[0].string(), equalTo("this is a really <em>long</em> <em>tag</em> i would like to highlight"));
assertThat(response.hits().hits()[0].highlightFields().get("tags").fragments()[1].string(), equalTo("here is another one that is very <em>long</em> <em>tag</em> and has the tag token near the end"));
response = client.prepareSearch("test")
.setQuery(QueryBuilders.matchQuery("tags", "long tag").type(MatchQueryBuilder.Type.PHRASE))
.addHighlightedField(new HighlightBuilder.Field("tags")
.fragmentSize(-1).numOfFragments(2).fragmenter("span"))
.execute().actionGet();
assertThat(response.hits().hits()[0].highlightFields().get("tags").fragments().length, equalTo(2));
assertThat(response.hits().hits()[0].highlightFields().get("tags").fragments()[0].string(), equalTo("this is a really <em>long</em> <em>tag</em> i would like to highlight"));
assertThat(response.hits().hits()[0].highlightFields().get("tags").fragments()[1].string(), equalTo("here is another one that is very <em>long</em> <em>tag</em> and has the tag token near the end"));
try {
client.prepareSearch("test")
.setQuery(QueryBuilders.matchQuery("tags", "long tag").type(MatchQueryBuilder.Type.PHRASE))
.addHighlightedField(new HighlightBuilder.Field("tags")
.fragmentSize(-1).numOfFragments(2).fragmenter("invalid"))
.execute().actionGet();
fail("Shouldn't get here");
} catch (SearchPhaseExecutionException e) {
assertThat(e.shardFailures()[0].status(), equalTo(RestStatus.BAD_REQUEST));
}
}
} }