From f72d5c1907d2589cee92ca30a80981fcfadf51b6 Mon Sep 17 00:00:00 2001 From: Martijn van Groningen Date: Thu, 6 Dec 2012 14:59:42 +0100 Subject: [PATCH] Expose fragmenter option for plain / normal highlighter. Closes #2465 --- .../search/highlight/HighlightBuilder.java | 22 +++++++ .../search/highlight/HighlightPhase.java | 13 +++- .../highlight/HighlighterParseElement.java | 8 +++ .../highlight/SearchContextHighlight.java | 10 +++ .../highlight/HighlighterSearchTests.java | 62 ++++++++++++++++++- 5 files changed, 109 insertions(+), 6 deletions(-) diff --git a/src/main/java/org/elasticsearch/search/highlight/HighlightBuilder.java b/src/main/java/org/elasticsearch/search/highlight/HighlightBuilder.java index 64024d971bd..10a68083c3e 100644 --- a/src/main/java/org/elasticsearch/search/highlight/HighlightBuilder.java +++ b/src/main/java/org/elasticsearch/search/highlight/HighlightBuilder.java @@ -50,6 +50,7 @@ public class HighlightBuilder implements ToXContent { private String highlighterType; + private String fragmenter; /** * Adds a field to be highlighted with default fragment size of 100 characters, and @@ -188,6 +189,15 @@ public class HighlightBuilder implements ToXContent { return this; } + /** + * Sets what fragmenter to use to break up text that is eligible for highlighting. + * This option is only applicable when using plain / normal highlighter. + */ + public HighlightBuilder fragmenter(String fragmenter) { + this.fragmenter = fragmenter; + return this; + } + @Override public XContentBuilder toXContent(XContentBuilder builder, Params params) throws IOException { builder.startObject("highlight"); @@ -212,6 +222,9 @@ public class HighlightBuilder implements ToXContent { if (highlighterType != null) { builder.field("type", highlighterType); } + if (fragmenter != null) { + builder.field("fragmenter", fragmenter); + } if (fields != null) { builder.startObject("fields"); for (Field field : fields) { @@ -231,6 +244,9 @@ public class HighlightBuilder implements ToXContent { if (field.highlighterType != null) { builder.field("type", field.highlighterType); } + if (field.fragmenter != null) { + builder.field("fragmenter", field.fragmenter); + } builder.endObject(); } @@ -248,6 +264,7 @@ public class HighlightBuilder implements ToXContent { int numOfFragments = -1; Boolean requireFieldMatch; String highlighterType; + String fragmenter; public Field(String name) { this.name = name; @@ -281,5 +298,10 @@ public class HighlightBuilder implements ToXContent { this.highlighterType = highlighterType; return this; } + + public Field fragmenter(String fragmenter) { + this.fragmenter = fragmenter; + return this; + } } } diff --git a/src/main/java/org/elasticsearch/search/highlight/HighlightPhase.java b/src/main/java/org/elasticsearch/search/highlight/HighlightPhase.java index 2fd967455a2..79bfbd4ab42 100644 --- a/src/main/java/org/elasticsearch/search/highlight/HighlightPhase.java +++ b/src/main/java/org/elasticsearch/search/highlight/HighlightPhase.java @@ -30,6 +30,7 @@ import org.apache.lucene.search.highlight.*; import org.apache.lucene.search.highlight.Formatter; import org.apache.lucene.search.vectorhighlight.*; import org.elasticsearch.ElasticSearchException; +import org.elasticsearch.ElasticSearchIllegalArgumentException; import org.elasticsearch.common.component.AbstractComponent; import org.elasticsearch.common.inject.Inject; import org.elasticsearch.common.io.FastStringReader; @@ -131,13 +132,13 @@ public class HighlightPhase extends AbstractComponent implements FetchSubPhase { useFastVectorHighlighter = mapper.storeTermVectors() && mapper.storeTermVectorOffsets() && mapper.storeTermVectorPositions(); } else if (field.highlighterType().equals("fast-vector-highlighter") || field.highlighterType().equals("fvh")) { if (!(mapper.storeTermVectors() && mapper.storeTermVectorOffsets() && mapper.storeTermVectorPositions())) { - throw new FetchPhaseExecutionException(context, "the field [" + field.field() + "] should be indexed with term vector with position offsets to be used with fast vector highlighter"); + throw new ElasticSearchIllegalArgumentException("the field [" + field.field() + "] should be indexed with term vector with position offsets to be used with fast vector highlighter"); } useFastVectorHighlighter = true; } else if (field.highlighterType().equals("highlighter") || field.highlighterType().equals("plain")) { useFastVectorHighlighter = false; } else { - throw new FetchPhaseExecutionException(context, "unknown highlighter type [" + field.highlighterType() + "] for the field [" + field.field() + "]"); + throw new ElasticSearchIllegalArgumentException("unknown highlighter type [" + field.highlighterType() + "] for the field [" + field.field() + "]"); } if (!useFastVectorHighlighter) { MapperHighlightEntry entry = cache.mappers.get(mapper); @@ -151,8 +152,14 @@ public class HighlightPhase extends AbstractComponent implements FetchSubPhase { Fragmenter fragmenter; if (field.numberOfFragments() == 0) { fragmenter = new NullFragmenter(); - } else { + } else if (field.fragmenter() == null) { fragmenter = new SimpleSpanFragmenter(queryScorer, field.fragmentCharSize()); + } else if ("simple".equals(field.fragmenter())) { + fragmenter = new SimpleFragmenter(field.fragmentCharSize()); + } else if ("span".equals(field.fragmenter())) { + fragmenter = new SimpleSpanFragmenter(queryScorer, field.fragmentCharSize()); + } else { + throw new ElasticSearchIllegalArgumentException("unknown fragmenter option [" + field.fragmenter() + "] for the field [" + field.field() + "]"); } Formatter formatter = new SimpleHTMLFormatter(field.preTags()[0], field.postTags()[0]); diff --git a/src/main/java/org/elasticsearch/search/highlight/HighlighterParseElement.java b/src/main/java/org/elasticsearch/search/highlight/HighlighterParseElement.java index 9a6674b4384..de89d6c4f2b 100644 --- a/src/main/java/org/elasticsearch/search/highlight/HighlighterParseElement.java +++ b/src/main/java/org/elasticsearch/search/highlight/HighlighterParseElement.java @@ -77,6 +77,7 @@ public class HighlighterParseElement implements SearchParseElement { int globalBoundaryMaxScan = SimpleBoundaryScanner2.DEFAULT_MAX_SCAN; char[] globalBoundaryChars = SimpleBoundaryScanner2.DEFAULT_BOUNDARY_CHARS; String globalHighlighterType = null; + String globalFragmenter = null; while ((token = parser.nextToken()) != XContentParser.Token.END_OBJECT) { if (token == XContentParser.Token.FIELD_NAME) { @@ -120,6 +121,8 @@ public class HighlighterParseElement implements SearchParseElement { globalBoundaryChars = parser.text().toCharArray(); } else if ("type".equals(topLevelFieldName)) { globalHighlighterType = parser.text(); + } else if ("fragmenter".equals(topLevelFieldName)) { + globalFragmenter = parser.text(); } } else if (token == XContentParser.Token.START_OBJECT) { if ("fields".equals(topLevelFieldName)) { @@ -166,6 +169,8 @@ public class HighlighterParseElement implements SearchParseElement { field.boundaryChars(parser.text().toCharArray()); } else if ("type".equals(fieldName)) { field.highlighterType(parser.text()); + } else if ("fragmenter".equals(fieldName)) { + field.fragmenter(parser.text()); } } } @@ -214,6 +219,9 @@ public class HighlighterParseElement implements SearchParseElement { if (field.highlighterType() == null) { field.highlighterType(globalHighlighterType); } + if (field.fragmenter() == null) { + field.fragmenter(globalFragmenter); + } } context.highlight(new SearchContextHighlight(fields)); diff --git a/src/main/java/org/elasticsearch/search/highlight/SearchContextHighlight.java b/src/main/java/org/elasticsearch/search/highlight/SearchContextHighlight.java index c70726f6cf5..a645f479304 100644 --- a/src/main/java/org/elasticsearch/search/highlight/SearchContextHighlight.java +++ b/src/main/java/org/elasticsearch/search/highlight/SearchContextHighlight.java @@ -60,6 +60,8 @@ public class SearchContextHighlight { private String highlighterType; + private String fragmenter; + private int boundaryMaxScan = -1; private char[] boundaryChars = null; @@ -151,6 +153,14 @@ public class SearchContextHighlight { this.highlighterType = type; } + public String fragmenter() { + return fragmenter; + } + + public void fragmenter(String fragmenter) { + this.fragmenter = fragmenter; + } + public int boundaryMaxScan() { return boundaryMaxScan; } diff --git a/src/test/java/org/elasticsearch/test/integration/search/highlight/HighlighterSearchTests.java b/src/test/java/org/elasticsearch/test/integration/search/highlight/HighlighterSearchTests.java index 6a6568305fe..d78800e14d9 100644 --- a/src/test/java/org/elasticsearch/test/integration/search/highlight/HighlighterSearchTests.java +++ b/src/test/java/org/elasticsearch/test/integration/search/highlight/HighlighterSearchTests.java @@ -20,14 +20,17 @@ package org.elasticsearch.test.integration.search.highlight; import org.elasticsearch.ElasticSearchException; +import org.elasticsearch.action.search.SearchPhaseExecutionException; import org.elasticsearch.action.search.SearchResponse; import org.elasticsearch.action.search.SearchType; import org.elasticsearch.client.Client; import org.elasticsearch.common.settings.ImmutableSettings; import org.elasticsearch.common.xcontent.XContentBuilder; import org.elasticsearch.common.xcontent.XContentFactory; +import org.elasticsearch.index.query.MatchQueryBuilder; import org.elasticsearch.index.query.QueryBuilders; import org.elasticsearch.indices.IndexMissingException; +import org.elasticsearch.rest.RestStatus; import org.elasticsearch.search.SearchHit; import org.elasticsearch.search.builder.SearchSourceBuilder; import org.elasticsearch.search.highlight.HighlightBuilder; @@ -49,6 +52,7 @@ import static org.elasticsearch.search.builder.SearchSourceBuilder.searchSource; import static org.hamcrest.MatcherAssert.assertThat; import static org.hamcrest.Matchers.equalTo; import static org.hamcrest.Matchers.instanceOf; +import static org.testng.Assert.fail; /** * @@ -915,8 +919,60 @@ public class HighlighterSearchTests extends AbstractNodesTests { .addHighlightedField("tags", -1, 0) .execute().actionGet(); - assertThat(2, equalTo(response.hits().hits()[0].highlightFields().get("tags").fragments().length)); - assertThat("this is a really long tag i would like to highlight", equalTo(response.hits().hits()[0].highlightFields().get("tags").fragments()[0].string())); - assertThat("here is another one that is very long and has the tag token near the end", equalTo(response.hits().hits()[0].highlightFields().get("tags").fragments()[1].string())); + assertThat(response.hits().hits()[0].highlightFields().get("tags").fragments().length, equalTo(2)); + assertThat(response.hits().hits()[0].highlightFields().get("tags").fragments()[0].string(), equalTo("this is a really long tag i would like to highlight")); + assertThat(response.hits().hits()[0].highlightFields().get("tags").fragments()[1].string(), equalTo("here is another one that is very long and has the tag token near the end")); } + + @Test + public void testPlainHighlightDifferentFragmenter() throws Exception { + try { + client.admin().indices().prepareDelete("test").execute().actionGet(); + } catch (Exception e) { + // ignore + } + + client.admin().indices().prepareCreate("test").setSettings(ImmutableSettings.settingsBuilder() + .put("number_of_shards", 1).put("number_of_replicas", 0)) + .addMapping("type1", jsonBuilder().startObject().startObject("type1").startObject("properties") + .startObject("tags").field("type", "string").endObject() + .endObject().endObject().endObject()) + .execute().actionGet(); + + client.prepareIndex("test", "type1", "1") + .setSource(jsonBuilder().startObject().field("tags", + "this is a really long tag i would like to highlight", + "here is another one that is very long tag and has the tag token near the end").endObject()) + .setRefresh(true).execute().actionGet(); + + SearchResponse response = client.prepareSearch("test") + .setQuery(QueryBuilders.matchQuery("tags", "long tag").type(MatchQueryBuilder.Type.PHRASE)) + .addHighlightedField(new HighlightBuilder.Field("tags") + .fragmentSize(-1).numOfFragments(2).fragmenter("simple")) + .execute().actionGet(); + assertThat(response.hits().hits()[0].highlightFields().get("tags").fragments().length, equalTo(2)); + assertThat(response.hits().hits()[0].highlightFields().get("tags").fragments()[0].string(), equalTo("this is a really long tag i would like to highlight")); + assertThat(response.hits().hits()[0].highlightFields().get("tags").fragments()[1].string(), equalTo("here is another one that is very long tag and has the tag token near the end")); + + response = client.prepareSearch("test") + .setQuery(QueryBuilders.matchQuery("tags", "long tag").type(MatchQueryBuilder.Type.PHRASE)) + .addHighlightedField(new HighlightBuilder.Field("tags") + .fragmentSize(-1).numOfFragments(2).fragmenter("span")) + .execute().actionGet(); + assertThat(response.hits().hits()[0].highlightFields().get("tags").fragments().length, equalTo(2)); + assertThat(response.hits().hits()[0].highlightFields().get("tags").fragments()[0].string(), equalTo("this is a really long tag i would like to highlight")); + assertThat(response.hits().hits()[0].highlightFields().get("tags").fragments()[1].string(), equalTo("here is another one that is very long tag and has the tag token near the end")); + + try { + client.prepareSearch("test") + .setQuery(QueryBuilders.matchQuery("tags", "long tag").type(MatchQueryBuilder.Type.PHRASE)) + .addHighlightedField(new HighlightBuilder.Field("tags") + .fragmentSize(-1).numOfFragments(2).fragmenter("invalid")) + .execute().actionGet(); + fail("Shouldn't get here"); + } catch (SearchPhaseExecutionException e) { + assertThat(e.shardFailures()[0].status(), equalTo(RestStatus.BAD_REQUEST)); + } + } + }