(fieldPhraseList.getPhraseList().iterator());
+ WeightedPhraseInfo phraseInfo = null;
+ int startOffset = 0;
+ while ((phraseInfo = queue.top()) != null) {
+ // if the phrase violates the border of previous fragment, discard
+ // it and try next phrase
+ if (phraseInfo.getStartOffset() < startOffset) {
+ queue.removeTop();
+ continue;
+ }
+
+ wpil.clear();
+ final int currentPhraseStartOffset = phraseInfo.getStartOffset();
+ int currentPhraseEndOffset = phraseInfo.getEndOffset();
+ int spanStart = Math.max(currentPhraseStartOffset - margin, startOffset);
+ int spanEnd = Math.max(currentPhraseEndOffset, spanStart + fragCharSize);
+ if (acceptPhrase(queue.removeTop(), currentPhraseEndOffset - currentPhraseStartOffset, fragCharSize)) {
+ wpil.add(phraseInfo);
+ }
+ while ((phraseInfo = queue.top()) != null) { // pull until we crossed the current spanEnd
+ if (phraseInfo.getEndOffset() <= spanEnd) {
+ currentPhraseEndOffset = phraseInfo.getEndOffset();
+ if (acceptPhrase(queue.removeTop(), currentPhraseEndOffset - currentPhraseStartOffset, fragCharSize)) {
+ wpil.add(phraseInfo);
+ }
+ } else {
+ break;
+ }
+ }
+ if (wpil.isEmpty()) {
+ continue;
+ }
+
+ final int matchLen = currentPhraseEndOffset - currentPhraseStartOffset;
+ // now recalculate the start and end position to "center" the result
+ final int newMargin = Math.max(0, (fragCharSize - matchLen) / 2);
+ // matchLen can be > fragCharSize prevent IAOOB here
+ spanStart = currentPhraseStartOffset - newMargin;
+ if (spanStart < startOffset) {
+ spanStart = startOffset;
+ }
+ // whatever is bigger here we grow this out
+ spanEnd = spanStart + Math.max(matchLen, fragCharSize);
+ startOffset = spanEnd;
+ fieldFragList.add(spanStart, spanEnd, wpil);
+ }
+ return fieldFragList;
+ }
+
+ /**
+ * A predicate to decide if the given {@link WeightedPhraseInfo} should be
+ * accepted as a highlighted phrase or if it should be discarded.
+ *
+ * The default implementation discards phrases that are composed of more
+ * than one term and where the matchLength exceeds the fragment character
+ * size.
+ *
+ * @param info
+ * the phrase info to accept
+ * @param matchLength
+ * the match length of the current phrase
+ * @param fragCharSize
+ * the configured fragment character size
+ * @return true
if this phrase info should be accepted as a
+ * highligh phrase
+ */
+ protected boolean acceptPhrase(WeightedPhraseInfo info, int matchLength, int fragCharSize) {
+ return info.getTermsOffsets().size() <= 1 || matchLength <= fragCharSize;
+ }
+
+ private static final class IteratorQueue {
+ private final Iterator iter;
+ private T top;
+
+ public IteratorQueue(Iterator iter) {
+ this.iter = iter;
+ T removeTop = removeTop();
+ assert removeTop == null;
+ }
+
+ public T top() {
+ return top;
+ }
+
+ public T removeTop() {
+ T currentTop = top;
+ if (iter.hasNext()) {
+ top = iter.next();
+ } else {
+ top = null;
+ }
+ return currentTop;
+ }
+
+ }
+
+}
diff --git a/src/main/java/org/elasticsearch/search/highlight/HighlightPhase.java b/src/main/java/org/elasticsearch/search/highlight/HighlightPhase.java
index 6bef7751641..28d3e64ad46 100644
--- a/src/main/java/org/elasticsearch/search/highlight/HighlightPhase.java
+++ b/src/main/java/org/elasticsearch/search/highlight/HighlightPhase.java
@@ -267,11 +267,7 @@ public class HighlightPhase extends AbstractComponent implements FetchSubPhase {
fragmentsBuilder = new SourceSimpleFragmentsBuilder(mapper, context, field.preTags(), field.postTags(), boundaryScanner);
}
} else {
- if (field.fragmentOffset() == -1)
- fragListBuilder = new SimpleFragListBuilder();
- else
- fragListBuilder = new SimpleFragListBuilder(field.fragmentOffset());
-
+ fragListBuilder = field.fragmentOffset() == -1 ? new XSimpleFragListBuilder() : new XSimpleFragListBuilder(field.fragmentOffset());
if (field.scoreOrdered()) {
if (mapper.fieldType().stored()) {
fragmentsBuilder = new XScoreOrderFragmentsBuilder(field.preTags(), field.postTags(), boundaryScanner);
diff --git a/src/test/java/org/elasticsearch/test/integration/search/highlight/HighlighterSearchTests.java b/src/test/java/org/elasticsearch/test/integration/search/highlight/HighlighterSearchTests.java
index e1a8ddd0e56..65ffabec45a 100644
--- a/src/test/java/org/elasticsearch/test/integration/search/highlight/HighlighterSearchTests.java
+++ b/src/test/java/org/elasticsearch/test/integration/search/highlight/HighlighterSearchTests.java
@@ -19,6 +19,12 @@
package org.elasticsearch.test.integration.search.highlight;
+import org.apache.lucene.index.Term;
+import org.apache.lucene.search.BooleanQuery;
+import org.apache.lucene.search.PhraseQuery;
+import org.apache.lucene.search.TermQuery;
+import org.apache.lucene.search.BooleanClause.Occur;
+import org.apache.lucene.search.vectorhighlight.FieldQuery;
import org.elasticsearch.ElasticSearchException;
import org.elasticsearch.action.search.SearchPhaseExecutionException;
import org.elasticsearch.action.search.SearchResponse;
@@ -31,6 +37,7 @@ import org.elasticsearch.common.xcontent.XContentBuilder;
import org.elasticsearch.common.xcontent.XContentFactory;
import org.elasticsearch.index.query.FilterBuilders;
import org.elasticsearch.index.query.MatchQueryBuilder;
+import org.elasticsearch.index.query.MatchQueryBuilder.Type;
import org.elasticsearch.index.query.QueryBuilders;
import org.elasticsearch.indices.IndexMissingException;
import org.elasticsearch.rest.RestStatus;
@@ -81,6 +88,69 @@ public class HighlighterSearchTests extends AbstractNodesTests {
return client("server1");
}
+
+ @Test
+ public void testEnsureNoNegativeOffsets() throws Exception {
+ try {
+ client.admin().indices().prepareDelete("test").execute().actionGet();
+ } catch (Exception e) {
+ // ignore
+ }
+
+ client.admin().indices().prepareCreate("test").setSettings(ImmutableSettings.settingsBuilder().put("index.number_of_shards", 2))
+ .addMapping("type1", jsonBuilder().startObject().startObject("type1").startObject("properties")
+ // we don't store title, now lets see if it works...
+ .startObject("no_long_term").field("type", "string").field("store", "no").field("term_vector", "with_positions_offsets").endObject()
+ .startObject("long_term").field("type", "string").field("store", "no").field("term_vector", "with_positions_offsets").endObject()
+ .endObject().endObject().endObject())
+ .execute().actionGet();
+
+
+ client.prepareIndex("test", "type1", "1")
+ .setSource(XContentFactory.jsonBuilder().startObject()
+ .startArray("no_long_term")
+ .value("This is a test where foo is highlighed and should be highlighted")
+ .endArray()
+ .startArray("long_term")
+ .value("This is a test thisisaverylongwordandmakessurethisfails where foo is highlighed and should be highlighted")
+ .endArray()
+ .endObject())
+ .setRefresh(true).execute().actionGet();
+
+
+ SearchResponse search = client.prepareSearch()
+ .setQuery(matchQuery("long_term", "thisisaverylongwordandmakessurethisfails foo highlighed"))
+ .addHighlightedField("long_term", 18, 1)
+ .execute().actionGet();
+
+ assertThat(search.getHits().totalHits(), equalTo(1l));
+ assertThat(search.getHits().hits().length, equalTo(1));
+
+ assertThat(search.getHits().hits()[0].highlightFields().get("long_term").fragments().length, equalTo(1));
+ assertThat(search.getHits().hits()[0].highlightFields().get("long_term").fragments()[0].string(), equalTo("thisisaverylongwordandmakessurethisfails"));
+
+
+ search = client.prepareSearch()
+ .setQuery(matchQuery("no_long_term", "test foo highlighed").type(Type.PHRASE).slop(3))
+ .addHighlightedField("no_long_term", 18, 1).setHighlighterPostTags("").setHighlighterPreTags("")
+ .execute().actionGet();
+ assertThat(search.getHits().totalHits(), equalTo(1l));
+ assertThat(search.getHits().hits().length, equalTo(1));
+ assertThat(search.getHits().hits()[0].highlightFields().size(), equalTo(0));
+
+
+ search = client.prepareSearch()
+ .setQuery(matchQuery("no_long_term", "test foo highlighed").type(Type.PHRASE).slop(3))
+ .addHighlightedField("no_long_term", 30, 1).setHighlighterPostTags("").setHighlighterPreTags("")
+ .execute().actionGet();
+
+ assertThat(search.getHits().totalHits(), equalTo(1l));
+ assertThat(search.getHits().hits().length, equalTo(1));
+
+ assertThat(search.getHits().hits()[0].highlightFields().get("no_long_term").fragments().length, equalTo(1));
+ assertThat(search.getHits().hits()[0].highlightFields().get("no_long_term").fragments()[0].string(), equalTo("a test where foo is highlighed and"));
+ }
+
@Test
public void testSourceLookupHighlightingUsingPlainHighlighter() throws Exception {
try {