diff --git a/src/main/java/org/apache/lucene/search/vectorhighlight/AbstractFragmentsBuilder.java b/src/main/java/org/apache/lucene/search/vectorhighlight/AbstractFragmentsBuilder.java index 183b9d5f93f..2535655f9d8 100644 --- a/src/main/java/org/apache/lucene/search/vectorhighlight/AbstractFragmentsBuilder.java +++ b/src/main/java/org/apache/lucene/search/vectorhighlight/AbstractFragmentsBuilder.java @@ -69,47 +69,8 @@ public abstract class AbstractFragmentsBuilder extends BaseFragmentsBuilder { return null; } - if (discreteMultiValueHighlighting && values.length > fragInfos.size()) { - Map> fieldsWeightedFragInfo = new HashMap>(); - int startOffset = 0; - int endOffset = 0; - for (Field value : values) { - endOffset += value.stringValue().length(); - List fieldToSubInfos = new ArrayList(); - List fieldToWeightedFragInfos = new ArrayList(); - fieldsWeightedFragInfo.put(value, fieldToWeightedFragInfos); - for (FieldFragList.WeightedFragInfo fragInfo : fragInfos) { - int weightedFragInfoStartOffset = startOffset; - if (fragInfo.getStartOffset() > startOffset && fragInfo.getStartOffset() < endOffset) { - weightedFragInfoStartOffset = fragInfo.getStartOffset(); - } - int weightedFragInfoEndOffset = endOffset; - if (fragInfo.getEndOffset() > startOffset && fragInfo.getEndOffset() < endOffset) { - weightedFragInfoEndOffset = fragInfo.getEndOffset(); - } - - fieldToWeightedFragInfos.add(new WeightedFragInfo(weightedFragInfoStartOffset, weightedFragInfoEndOffset, fragInfo.getTotalBoost(), fieldToSubInfos)); - for (FieldFragList.WeightedFragInfo.SubInfo subInfo : fragInfo.getSubInfos()) { - for (FieldPhraseList.WeightedPhraseInfo.Toffs toffs : subInfo.getTermsOffsets()) { - if (toffs.getStartOffset() >= startOffset && toffs.getEndOffset() < endOffset) { - fieldToSubInfos.add(subInfo); - } - } - } - } - startOffset = endOffset + 1; - } - fragInfos.clear(); - for (Map.Entry> entry : fieldsWeightedFragInfo.entrySet()) { - fragInfos.addAll(entry.getValue()); - } - Collections.sort(fragInfos, new Comparator() { - - public int compare(FieldFragList.WeightedFragInfo info1, FieldFragList.WeightedFragInfo info2) { - return info1.getStartOffset() - info2.getStartOffset(); - } - - }); + if (discreteMultiValueHighlighting && values.length > 1) { + fragInfos = discreteMultiValueHighlighting(fragInfos, values); } fragInfos = getWeightedFragInfoList(fragInfos); @@ -123,6 +84,92 @@ public abstract class AbstractFragmentsBuilder extends BaseFragmentsBuilder { return fragments.toArray(new String[fragments.size()]); } + protected List discreteMultiValueHighlighting(List fragInfos, Field[] fields) { + Map> fieldNameToFragInfos = new HashMap>(); + for (Field field : fields) { + fieldNameToFragInfos.put(field.name(), new ArrayList()); + } + + fragInfos: + for (FieldFragList.WeightedFragInfo fragInfo : fragInfos) { + int fieldStart; + int fieldEnd = 0; + for (Field field : fields) { + if (field.stringValue().isEmpty()) { + fieldEnd++; + continue; + } + fieldStart = fieldEnd; + fieldEnd += field.stringValue().length() + 1; // + 1 for going to next field with same name. + + if (fragInfo.getStartOffset() >= fieldStart && fragInfo.getEndOffset() >= fieldStart && + fragInfo.getStartOffset() <= fieldEnd && fragInfo.getEndOffset() <= fieldEnd) { + fieldNameToFragInfos.get(field.name()).add(fragInfo); + continue fragInfos; + } + + if (fragInfo.getSubInfos().isEmpty()) { + continue fragInfos; + } + + FieldPhraseList.WeightedPhraseInfo.Toffs firstToffs = fragInfo.getSubInfos().get(0).getTermsOffsets().get(0); + if (fragInfo.getStartOffset() >= fieldEnd || firstToffs.getStartOffset() >= fieldEnd) { + continue; + } + + int fragStart = fieldStart; + if (fragInfo.getStartOffset() > fieldStart && fragInfo.getStartOffset() < fieldEnd) { + fragStart = fragInfo.getStartOffset(); + } + + int fragEnd = fieldEnd; + if (fragInfo.getEndOffset() > fieldStart && fragInfo.getEndOffset() < fieldEnd) { + fragEnd = fragInfo.getEndOffset(); + } + + + List subInfos = new ArrayList(); + WeightedFragInfo weightedFragInfo = new WeightedFragInfo(fragStart, fragEnd, fragInfo.getTotalBoost(), subInfos); + + Iterator subInfoIterator = fragInfo.getSubInfos().iterator(); + while (subInfoIterator.hasNext()) { + FieldFragList.WeightedFragInfo.SubInfo subInfo = subInfoIterator.next(); + List toffsList = new ArrayList(); + Iterator toffsIterator = subInfo.getTermsOffsets().iterator(); + while (toffsIterator.hasNext()) { + FieldPhraseList.WeightedPhraseInfo.Toffs toffs = toffsIterator.next(); + if (toffs.getStartOffset() >= fieldStart && toffs.getEndOffset() <= fieldEnd) { + toffsList.add(toffs); + toffsIterator.remove(); + } + } + if (!toffsList.isEmpty()) { + subInfos.add(new FieldFragList.WeightedFragInfo.SubInfo(subInfo.text, toffsList, subInfo.getSeqnum())); + } + + if (subInfo.getTermsOffsets().isEmpty()) { + subInfoIterator.remove(); + } + } + fieldNameToFragInfos.get(field.name()).add(weightedFragInfo); + } + } + + List result = new ArrayList(); + for (List weightedFragInfos : fieldNameToFragInfos.values()) { + result.addAll(weightedFragInfos); + } + Collections.sort(result, new Comparator() { + + public int compare(FieldFragList.WeightedFragInfo info1, FieldFragList.WeightedFragInfo info2) { + return info1.getStartOffset() - info2.getStartOffset(); + } + + }); + + return result; + } + private static class WeightedFragInfo extends FieldFragList.WeightedFragInfo { private final static List EMPTY = Collections.emptyList(); diff --git a/src/test/java/org/elasticsearch/test/integration/search/highlight/HighlighterSearchTests.java b/src/test/java/org/elasticsearch/test/integration/search/highlight/HighlighterSearchTests.java index 68ee978925d..7fc0cd23d0a 100644 --- a/src/test/java/org/elasticsearch/test/integration/search/highlight/HighlighterSearchTests.java +++ b/src/test/java/org/elasticsearch/test/integration/search/highlight/HighlighterSearchTests.java @@ -212,6 +212,16 @@ public class HighlighterSearchTests extends AbstractNodesTests { .endObject()) .setRefresh(true).execute().actionGet(); + + client.prepareIndex("test", "type1", "2") + .setSource(XContentFactory.jsonBuilder().startObject() + .startArray("titleTV") + .value("some text to highlight") + .value("highlight other text") + .endArray() + .endObject()) + .setRefresh(true).execute().actionGet(); + SearchResponse search = client.prepareSearch() .setQuery(fieldQuery("title", "bug")) .addHighlightedField("title", -1, 2) @@ -228,6 +238,17 @@ public class HighlighterSearchTests extends AbstractNodesTests { // assertThat(search.hits().hits()[0].highlightFields().get("titleTV").fragments()[0], equalTo("This is a test on the highlighting bug present in elasticsearch")); assertThat(search.hits().hits()[0].highlightFields().get("titleTV").fragments()[0], equalTo("highlighting bug present in elasticsearch")); // FastVectorHighlighter starts highlighting from startOffset - margin assertThat(search.hits().hits()[0].highlightFields().get("titleTV").fragments()[1], equalTo("The bug is bugging us")); + + search = client.prepareSearch() + .setQuery(fieldQuery("titleTV", "highlight")) + .addHighlightedField("titleTV", -1, 2) + . execute().actionGet(); + + assertThat(search.hits().totalHits(), equalTo(1l)); + assertThat(search.hits().hits().length, equalTo(1)); + assertThat(search.hits().hits()[0].highlightFields().get("titleTV").fragments().length, equalTo(2)); + assertThat(search.hits().hits()[0].highlightFields().get("titleTV").fragments()[0], equalTo("text to highlight")); + assertThat(search.hits().hits()[0].highlightFields().get("titleTV").fragments()[1], equalTo("highlight other text")); } @Test