Better fix for mv field highlighting issue #1994

This commit is contained in:
Martijn van Groningen 2012-06-18 22:45:17 +02:00 committed by Shay Banon
parent aebd27afbd
commit d66f401ce6
2 changed files with 109 additions and 41 deletions

View File

@ -69,47 +69,8 @@ public abstract class AbstractFragmentsBuilder extends BaseFragmentsBuilder {
return null; return null;
} }
if (discreteMultiValueHighlighting && values.length > fragInfos.size()) { if (discreteMultiValueHighlighting && values.length > 1) {
Map<Field, List<FieldFragList.WeightedFragInfo>> fieldsWeightedFragInfo = new HashMap<Field, List<FieldFragList.WeightedFragInfo>>(); fragInfos = discreteMultiValueHighlighting(fragInfos, values);
int startOffset = 0;
int endOffset = 0;
for (Field value : values) {
endOffset += value.stringValue().length();
List<FieldFragList.WeightedFragInfo.SubInfo> fieldToSubInfos = new ArrayList<FieldFragList.WeightedFragInfo.SubInfo>();
List<FieldFragList.WeightedFragInfo> fieldToWeightedFragInfos = new ArrayList<FieldFragList.WeightedFragInfo>();
fieldsWeightedFragInfo.put(value, fieldToWeightedFragInfos);
for (FieldFragList.WeightedFragInfo fragInfo : fragInfos) {
int weightedFragInfoStartOffset = startOffset;
if (fragInfo.getStartOffset() > startOffset && fragInfo.getStartOffset() < endOffset) {
weightedFragInfoStartOffset = fragInfo.getStartOffset();
}
int weightedFragInfoEndOffset = endOffset;
if (fragInfo.getEndOffset() > startOffset && fragInfo.getEndOffset() < endOffset) {
weightedFragInfoEndOffset = fragInfo.getEndOffset();
}
fieldToWeightedFragInfos.add(new WeightedFragInfo(weightedFragInfoStartOffset, weightedFragInfoEndOffset, fragInfo.getTotalBoost(), fieldToSubInfos));
for (FieldFragList.WeightedFragInfo.SubInfo subInfo : fragInfo.getSubInfos()) {
for (FieldPhraseList.WeightedPhraseInfo.Toffs toffs : subInfo.getTermsOffsets()) {
if (toffs.getStartOffset() >= startOffset && toffs.getEndOffset() < endOffset) {
fieldToSubInfos.add(subInfo);
}
}
}
}
startOffset = endOffset + 1;
}
fragInfos.clear();
for (Map.Entry<Field, List<FieldFragList.WeightedFragInfo>> entry : fieldsWeightedFragInfo.entrySet()) {
fragInfos.addAll(entry.getValue());
}
Collections.sort(fragInfos, new Comparator<FieldFragList.WeightedFragInfo>() {
public int compare(FieldFragList.WeightedFragInfo info1, FieldFragList.WeightedFragInfo info2) {
return info1.getStartOffset() - info2.getStartOffset();
}
});
} }
fragInfos = getWeightedFragInfoList(fragInfos); fragInfos = getWeightedFragInfoList(fragInfos);
@ -123,6 +84,92 @@ public abstract class AbstractFragmentsBuilder extends BaseFragmentsBuilder {
return fragments.toArray(new String[fragments.size()]); return fragments.toArray(new String[fragments.size()]);
} }
protected List<FieldFragList.WeightedFragInfo> discreteMultiValueHighlighting(List<FieldFragList.WeightedFragInfo> fragInfos, Field[] fields) {
Map<String, List<FieldFragList.WeightedFragInfo>> fieldNameToFragInfos = new HashMap<String, List<FieldFragList.WeightedFragInfo>>();
for (Field field : fields) {
fieldNameToFragInfos.put(field.name(), new ArrayList<FieldFragList.WeightedFragInfo>());
}
fragInfos:
for (FieldFragList.WeightedFragInfo fragInfo : fragInfos) {
int fieldStart;
int fieldEnd = 0;
for (Field field : fields) {
if (field.stringValue().isEmpty()) {
fieldEnd++;
continue;
}
fieldStart = fieldEnd;
fieldEnd += field.stringValue().length() + 1; // + 1 for going to next field with same name.
if (fragInfo.getStartOffset() >= fieldStart && fragInfo.getEndOffset() >= fieldStart &&
fragInfo.getStartOffset() <= fieldEnd && fragInfo.getEndOffset() <= fieldEnd) {
fieldNameToFragInfos.get(field.name()).add(fragInfo);
continue fragInfos;
}
if (fragInfo.getSubInfos().isEmpty()) {
continue fragInfos;
}
FieldPhraseList.WeightedPhraseInfo.Toffs firstToffs = fragInfo.getSubInfos().get(0).getTermsOffsets().get(0);
if (fragInfo.getStartOffset() >= fieldEnd || firstToffs.getStartOffset() >= fieldEnd) {
continue;
}
int fragStart = fieldStart;
if (fragInfo.getStartOffset() > fieldStart && fragInfo.getStartOffset() < fieldEnd) {
fragStart = fragInfo.getStartOffset();
}
int fragEnd = fieldEnd;
if (fragInfo.getEndOffset() > fieldStart && fragInfo.getEndOffset() < fieldEnd) {
fragEnd = fragInfo.getEndOffset();
}
List<WeightedFragInfo.SubInfo> subInfos = new ArrayList<WeightedFragInfo.SubInfo>();
WeightedFragInfo weightedFragInfo = new WeightedFragInfo(fragStart, fragEnd, fragInfo.getTotalBoost(), subInfos);
Iterator<FieldFragList.WeightedFragInfo.SubInfo> subInfoIterator = fragInfo.getSubInfos().iterator();
while (subInfoIterator.hasNext()) {
FieldFragList.WeightedFragInfo.SubInfo subInfo = subInfoIterator.next();
List<FieldPhraseList.WeightedPhraseInfo.Toffs> toffsList = new ArrayList<FieldPhraseList.WeightedPhraseInfo.Toffs>();
Iterator<FieldPhraseList.WeightedPhraseInfo.Toffs> toffsIterator = subInfo.getTermsOffsets().iterator();
while (toffsIterator.hasNext()) {
FieldPhraseList.WeightedPhraseInfo.Toffs toffs = toffsIterator.next();
if (toffs.getStartOffset() >= fieldStart && toffs.getEndOffset() <= fieldEnd) {
toffsList.add(toffs);
toffsIterator.remove();
}
}
if (!toffsList.isEmpty()) {
subInfos.add(new FieldFragList.WeightedFragInfo.SubInfo(subInfo.text, toffsList, subInfo.getSeqnum()));
}
if (subInfo.getTermsOffsets().isEmpty()) {
subInfoIterator.remove();
}
}
fieldNameToFragInfos.get(field.name()).add(weightedFragInfo);
}
}
List<FieldFragList.WeightedFragInfo> result = new ArrayList<FieldFragList.WeightedFragInfo>();
for (List<FieldFragList.WeightedFragInfo> weightedFragInfos : fieldNameToFragInfos.values()) {
result.addAll(weightedFragInfos);
}
Collections.sort(result, new Comparator<FieldFragList.WeightedFragInfo>() {
public int compare(FieldFragList.WeightedFragInfo info1, FieldFragList.WeightedFragInfo info2) {
return info1.getStartOffset() - info2.getStartOffset();
}
});
return result;
}
private static class WeightedFragInfo extends FieldFragList.WeightedFragInfo { private static class WeightedFragInfo extends FieldFragList.WeightedFragInfo {
private final static List<FieldPhraseList.WeightedPhraseInfo> EMPTY = Collections.emptyList(); private final static List<FieldPhraseList.WeightedPhraseInfo> EMPTY = Collections.emptyList();

View File

@ -212,6 +212,16 @@ public class HighlighterSearchTests extends AbstractNodesTests {
.endObject()) .endObject())
.setRefresh(true).execute().actionGet(); .setRefresh(true).execute().actionGet();
client.prepareIndex("test", "type1", "2")
.setSource(XContentFactory.jsonBuilder().startObject()
.startArray("titleTV")
.value("some text to highlight")
.value("highlight other text")
.endArray()
.endObject())
.setRefresh(true).execute().actionGet();
SearchResponse search = client.prepareSearch() SearchResponse search = client.prepareSearch()
.setQuery(fieldQuery("title", "bug")) .setQuery(fieldQuery("title", "bug"))
.addHighlightedField("title", -1, 2) .addHighlightedField("title", -1, 2)
@ -228,6 +238,17 @@ public class HighlighterSearchTests extends AbstractNodesTests {
// assertThat(search.hits().hits()[0].highlightFields().get("titleTV").fragments()[0], equalTo("This is a test on the highlighting <em>bug</em> present in elasticsearch")); // assertThat(search.hits().hits()[0].highlightFields().get("titleTV").fragments()[0], equalTo("This is a test on the highlighting <em>bug</em> present in elasticsearch"));
assertThat(search.hits().hits()[0].highlightFields().get("titleTV").fragments()[0], equalTo("highlighting <em>bug</em> present in elasticsearch")); // FastVectorHighlighter starts highlighting from startOffset - margin assertThat(search.hits().hits()[0].highlightFields().get("titleTV").fragments()[0], equalTo("highlighting <em>bug</em> present in elasticsearch")); // FastVectorHighlighter starts highlighting from startOffset - margin
assertThat(search.hits().hits()[0].highlightFields().get("titleTV").fragments()[1], equalTo("The <em>bug</em> is bugging us")); assertThat(search.hits().hits()[0].highlightFields().get("titleTV").fragments()[1], equalTo("The <em>bug</em> is bugging us"));
search = client.prepareSearch()
.setQuery(fieldQuery("titleTV", "highlight"))
.addHighlightedField("titleTV", -1, 2)
. execute().actionGet();
assertThat(search.hits().totalHits(), equalTo(1l));
assertThat(search.hits().hits().length, equalTo(1));
assertThat(search.hits().hits()[0].highlightFields().get("titleTV").fragments().length, equalTo(2));
assertThat(search.hits().hits()[0].highlightFields().get("titleTV").fragments()[0], equalTo("text to <em>highlight</em>"));
assertThat(search.hits().hits()[0].highlightFields().get("titleTV").fragments()[1], equalTo("<em>highlight</em> other text"));
} }
@Test @Test