Fixes highlight issue for multivalues fields described in issue #1994
This commit is contained in:
parent
9798d3e8fb
commit
1319ed9322
|
@ -0,0 +1,136 @@
|
||||||
|
/*
|
||||||
|
* Licensed to Elastic Search and Shay Banon under one
|
||||||
|
* or more contributor license agreements. See the NOTICE file
|
||||||
|
* distributed with this work for additional information
|
||||||
|
* regarding copyright ownership. Elastic Search licenses this
|
||||||
|
* file to you under the Apache License, Version 2.0 (the
|
||||||
|
* "License"); you may not use this file except in compliance
|
||||||
|
* with the License. You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing,
|
||||||
|
* software distributed under the License is distributed on an
|
||||||
|
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
||||||
|
* KIND, either express or implied. See the License for the
|
||||||
|
* specific language governing permissions and limitations
|
||||||
|
* under the License.
|
||||||
|
*/
|
||||||
|
|
||||||
|
package org.apache.lucene.search.vectorhighlight;
|
||||||
|
|
||||||
|
import org.apache.lucene.document.Field;
|
||||||
|
import org.apache.lucene.index.IndexReader;
|
||||||
|
import org.apache.lucene.search.highlight.Encoder;
|
||||||
|
|
||||||
|
import java.io.IOException;
|
||||||
|
import java.util.*;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Abstract {@link FragmentsBuilder} implementation that detects whether highlight hits occurred on a field that is
|
||||||
|
* multivalued (Basically fields that have the same name) and splits the highlight snippets according to a single field
|
||||||
|
* boundary. This avoids that a highlight hit is shown as one hit whilst it is actually a hit on multiple fields.
|
||||||
|
*/
|
||||||
|
public abstract class AbstractFragmentsBuilder extends BaseFragmentsBuilder {
|
||||||
|
|
||||||
|
private boolean discreteMultiValueHighlighting = true;
|
||||||
|
|
||||||
|
protected AbstractFragmentsBuilder(){
|
||||||
|
super();
|
||||||
|
}
|
||||||
|
|
||||||
|
protected AbstractFragmentsBuilder(BoundaryScanner boundaryScanner){
|
||||||
|
super(boundaryScanner);
|
||||||
|
}
|
||||||
|
|
||||||
|
protected AbstractFragmentsBuilder( String[] preTags, String[] postTags ){
|
||||||
|
super(preTags, postTags);
|
||||||
|
}
|
||||||
|
|
||||||
|
public AbstractFragmentsBuilder(String[] preTags, String[] postTags, BoundaryScanner bs) {
|
||||||
|
super( preTags, postTags, bs );
|
||||||
|
}
|
||||||
|
|
||||||
|
public void setDiscreteMultiValueHighlighting(boolean discreteMultiValueHighlighting) {
|
||||||
|
this.discreteMultiValueHighlighting = discreteMultiValueHighlighting;
|
||||||
|
}
|
||||||
|
|
||||||
|
public String[] createFragments(IndexReader reader, int docId,
|
||||||
|
String fieldName, FieldFragList fieldFragList, int maxNumFragments,
|
||||||
|
String[] preTags, String[] postTags, Encoder encoder) throws IOException {
|
||||||
|
if (maxNumFragments < 0) {
|
||||||
|
throw new IllegalArgumentException("maxNumFragments(" + maxNumFragments + ") must be positive number.");
|
||||||
|
}
|
||||||
|
|
||||||
|
List<String> fragments = new ArrayList<String>(maxNumFragments);
|
||||||
|
List<FieldFragList.WeightedFragInfo> fragInfos = fieldFragList.getFragInfos();
|
||||||
|
Field[] values = getFields(reader, docId, fieldName);
|
||||||
|
if (values.length == 0) {
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (discreteMultiValueHighlighting && values.length > fragInfos.size()) {
|
||||||
|
Map<Field, List<FieldFragList.WeightedFragInfo>> fieldsWeightedFragInfo = new HashMap<Field, List<FieldFragList.WeightedFragInfo>>();
|
||||||
|
int startOffset = 0;
|
||||||
|
int endOffset = 0;
|
||||||
|
for (Field value : values) {
|
||||||
|
endOffset += value.stringValue().length();
|
||||||
|
List<FieldFragList.WeightedFragInfo.SubInfo> fieldToSubInfos = new ArrayList<FieldFragList.WeightedFragInfo.SubInfo>();
|
||||||
|
List<FieldFragList.WeightedFragInfo> fieldToWeightedFragInfos = new ArrayList<FieldFragList.WeightedFragInfo>();
|
||||||
|
fieldsWeightedFragInfo.put(value, fieldToWeightedFragInfos);
|
||||||
|
for (FieldFragList.WeightedFragInfo fragInfo : fragInfos) {
|
||||||
|
int weightedFragInfoStartOffset = startOffset;
|
||||||
|
if (fragInfo.getStartOffset() > startOffset && fragInfo.getStartOffset() < endOffset) {
|
||||||
|
weightedFragInfoStartOffset = fragInfo.getStartOffset();
|
||||||
|
}
|
||||||
|
int weightedFragInfoEndOffset = endOffset;
|
||||||
|
if (fragInfo.getEndOffset() > startOffset && fragInfo.getEndOffset() < endOffset) {
|
||||||
|
weightedFragInfoEndOffset = fragInfo.getEndOffset();
|
||||||
|
}
|
||||||
|
|
||||||
|
fieldToWeightedFragInfos.add(new WeightedFragInfo(weightedFragInfoStartOffset, weightedFragInfoEndOffset, fragInfo.getTotalBoost(), fieldToSubInfos));
|
||||||
|
for (FieldFragList.WeightedFragInfo.SubInfo subInfo : fragInfo.getSubInfos()) {
|
||||||
|
for (FieldPhraseList.WeightedPhraseInfo.Toffs toffs : subInfo.getTermsOffsets()) {
|
||||||
|
if (toffs.getStartOffset() >= startOffset && toffs.getEndOffset() < endOffset) {
|
||||||
|
fieldToSubInfos.add(subInfo);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
startOffset = endOffset + 1;
|
||||||
|
}
|
||||||
|
fragInfos.clear();
|
||||||
|
for (Map.Entry<Field, List<FieldFragList.WeightedFragInfo>> entry : fieldsWeightedFragInfo.entrySet()) {
|
||||||
|
fragInfos.addAll(entry.getValue());
|
||||||
|
}
|
||||||
|
Collections.sort(fragInfos, new Comparator<FieldFragList.WeightedFragInfo>() {
|
||||||
|
|
||||||
|
public int compare(FieldFragList.WeightedFragInfo info1, FieldFragList.WeightedFragInfo info2) {
|
||||||
|
return info1.getStartOffset() - info2.getStartOffset();
|
||||||
|
}
|
||||||
|
|
||||||
|
});
|
||||||
|
fragInfos = getWeightedFragInfoList(fragInfos);
|
||||||
|
}
|
||||||
|
|
||||||
|
StringBuilder buffer = new StringBuilder();
|
||||||
|
int[] nextValueIndex = {0};
|
||||||
|
for (int n = 0; n < maxNumFragments && n < fragInfos.size(); n++) {
|
||||||
|
FieldFragList.WeightedFragInfo fragInfo = fragInfos.get(n);
|
||||||
|
fragments.add(makeFragment(buffer, nextValueIndex, values, fragInfo, preTags, postTags, encoder));
|
||||||
|
}
|
||||||
|
return fragments.toArray(new String[fragments.size()]);
|
||||||
|
}
|
||||||
|
|
||||||
|
private static class WeightedFragInfo extends FieldFragList.WeightedFragInfo {
|
||||||
|
|
||||||
|
private final static List<FieldPhraseList.WeightedPhraseInfo> EMPTY = Collections.emptyList();
|
||||||
|
|
||||||
|
private WeightedFragInfo(int startOffset, int endOffset, float totalBoost, List<FieldFragList.WeightedFragInfo.SubInfo> subInfos) {
|
||||||
|
super(startOffset, endOffset, EMPTY);
|
||||||
|
this.subInfos = subInfos;
|
||||||
|
this.totalBoost = totalBoost;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
|
@ -0,0 +1,78 @@
|
||||||
|
package org.apache.lucene.search.vectorhighlight;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||||
|
* contributor license agreements. See the NOTICE file distributed with
|
||||||
|
* this work for additional information regarding copyright ownership.
|
||||||
|
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||||
|
* (the "License"); you may not use this file except in compliance with
|
||||||
|
* the License. You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
* See the License for the specific language governing permissions and
|
||||||
|
* limitations under the License.
|
||||||
|
*/
|
||||||
|
|
||||||
|
import org.apache.lucene.search.vectorhighlight.FieldFragList.WeightedFragInfo;
|
||||||
|
|
||||||
|
import java.util.Collections;
|
||||||
|
import java.util.Comparator;
|
||||||
|
import java.util.List;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* An implementation of FragmentsBuilder that outputs score-order fragments.
|
||||||
|
*/
|
||||||
|
public class ScoreOrderFragmentsBuilder extends AbstractFragmentsBuilder {
|
||||||
|
|
||||||
|
/**
|
||||||
|
* a constructor.
|
||||||
|
*/
|
||||||
|
public ScoreOrderFragmentsBuilder(){
|
||||||
|
super();
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* a constructor.
|
||||||
|
*
|
||||||
|
* @param preTags array of pre-tags for markup terms.
|
||||||
|
* @param postTags array of post-tags for markup terms.
|
||||||
|
*/
|
||||||
|
public ScoreOrderFragmentsBuilder(String[] preTags, String[] postTags){
|
||||||
|
super( preTags, postTags );
|
||||||
|
}
|
||||||
|
|
||||||
|
public ScoreOrderFragmentsBuilder(BoundaryScanner bs){
|
||||||
|
super( bs );
|
||||||
|
}
|
||||||
|
|
||||||
|
public ScoreOrderFragmentsBuilder(String[] preTags, String[] postTags, BoundaryScanner bs){
|
||||||
|
super( preTags, postTags, bs );
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Sort by score the list of WeightedFragInfo
|
||||||
|
*/
|
||||||
|
@Override
|
||||||
|
public List<WeightedFragInfo> getWeightedFragInfoList( List<WeightedFragInfo> src ) {
|
||||||
|
Collections.sort( src, new ScoreComparator() );
|
||||||
|
return src;
|
||||||
|
}
|
||||||
|
|
||||||
|
public static class ScoreComparator implements Comparator<WeightedFragInfo> {
|
||||||
|
|
||||||
|
public int compare( WeightedFragInfo o1, WeightedFragInfo o2 ) {
|
||||||
|
if( o1.totalBoost > o2.totalBoost ) return -1;
|
||||||
|
else if( o1.totalBoost < o2.totalBoost ) return 1;
|
||||||
|
// if same score then check startOffset
|
||||||
|
else{
|
||||||
|
if( o1.startOffset < o2.startOffset ) return -1;
|
||||||
|
else if( o1.startOffset > o2.startOffset ) return 1;
|
||||||
|
}
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
|
@ -0,0 +1,62 @@
|
||||||
|
package org.apache.lucene.search.vectorhighlight;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||||
|
* contributor license agreements. See the NOTICE file distributed with
|
||||||
|
* this work for additional information regarding copyright ownership.
|
||||||
|
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||||
|
* (the "License"); you may not use this file except in compliance with
|
||||||
|
* the License. You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
* See the License for the specific language governing permissions and
|
||||||
|
* limitations under the License.
|
||||||
|
*/
|
||||||
|
|
||||||
|
import org.apache.lucene.search.vectorhighlight.FieldFragList.WeightedFragInfo;
|
||||||
|
|
||||||
|
import java.util.List;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* A simple implementation of FragmentsBuilder.
|
||||||
|
*
|
||||||
|
*/
|
||||||
|
public class SimpleFragmentsBuilder extends AbstractFragmentsBuilder {
|
||||||
|
|
||||||
|
/**
|
||||||
|
* a constructor.
|
||||||
|
*/
|
||||||
|
public SimpleFragmentsBuilder() {
|
||||||
|
super();
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* a constructor.
|
||||||
|
*
|
||||||
|
* @param preTags array of pre-tags for markup terms.
|
||||||
|
* @param postTags array of post-tags for markup terms.
|
||||||
|
*/
|
||||||
|
public SimpleFragmentsBuilder(String[] preTags, String[] postTags) {
|
||||||
|
super( preTags, postTags );
|
||||||
|
}
|
||||||
|
|
||||||
|
public SimpleFragmentsBuilder(BoundaryScanner bs) {
|
||||||
|
super( bs );
|
||||||
|
}
|
||||||
|
|
||||||
|
public SimpleFragmentsBuilder(String[] preTags, String[] postTags, BoundaryScanner bs) {
|
||||||
|
super( preTags, postTags, bs );
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* do nothing. return the source list.
|
||||||
|
*/
|
||||||
|
@Override
|
||||||
|
public List<WeightedFragInfo> getWeightedFragInfoList( List<WeightedFragInfo> src ) {
|
||||||
|
return src;
|
||||||
|
}
|
||||||
|
}
|
|
@ -220,9 +220,9 @@ public class HighlightPhase implements FetchSubPhase {
|
||||||
String[] fragments = null;
|
String[] fragments = null;
|
||||||
// number_of_fragments is set to 0 but we have a multivalued field
|
// number_of_fragments is set to 0 but we have a multivalued field
|
||||||
if (field.numberOfFragments() == 0 && textsToHighlight.size() > 1 && fragsList.size() > 0) {
|
if (field.numberOfFragments() == 0 && textsToHighlight.size() > 1 && fragsList.size() > 0) {
|
||||||
fragments = new String[1];
|
fragments = new String[fragsList.size()];
|
||||||
for (int i = 0; i < fragsList.size(); i++) {
|
for (int i = 0; i < fragsList.size(); i++) {
|
||||||
fragments[0] = (fragments[0] != null ? (fragments[0] + " ") : "") + fragsList.get(i).toString();
|
fragments[i] = fragsList.get(i).toString();
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
// refine numberOfFragments if needed
|
// refine numberOfFragments if needed
|
||||||
|
|
|
@ -122,7 +122,8 @@ public class HighlighterSearchTests extends AbstractNodesTests {
|
||||||
assertThat(search.hits().hits().length, equalTo(5));
|
assertThat(search.hits().hits().length, equalTo(5));
|
||||||
|
|
||||||
for (SearchHit hit : search.hits()) {
|
for (SearchHit hit : search.hits()) {
|
||||||
assertThat(hit.highlightFields().get("attachments.body").fragments()[0], equalTo("<em>attachment</em> 1 <em>attachment</em> 2"));
|
assertThat(hit.highlightFields().get("attachments.body").fragments()[0], equalTo("<em>attachment</em> 1"));
|
||||||
|
assertThat(hit.highlightFields().get("attachments.body").fragments()[1], equalTo("<em>attachment</em> 2"));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -167,7 +168,7 @@ public class HighlighterSearchTests extends AbstractNodesTests {
|
||||||
|
|
||||||
search = client.prepareSearch()
|
search = client.prepareSearch()
|
||||||
.setQuery(fieldQuery("attachments.body", "attachment"))
|
.setQuery(fieldQuery("attachments.body", "attachment"))
|
||||||
.addHighlightedField("attachments.body", -1, 0)
|
.addHighlightedField("attachments.body", -1, 2)
|
||||||
.execute().actionGet();
|
.execute().actionGet();
|
||||||
|
|
||||||
assertThat(Arrays.toString(search.shardFailures()), search.failedShards(), equalTo(0));
|
assertThat(Arrays.toString(search.shardFailures()), search.failedShards(), equalTo(0));
|
||||||
|
@ -176,10 +177,59 @@ public class HighlighterSearchTests extends AbstractNodesTests {
|
||||||
assertThat(search.hits().hits().length, equalTo(5));
|
assertThat(search.hits().hits().length, equalTo(5));
|
||||||
|
|
||||||
for (SearchHit hit : search.hits()) {
|
for (SearchHit hit : search.hits()) {
|
||||||
assertThat(hit.highlightFields().get("attachments.body").fragments()[0], equalTo("<em>attachment</em> 1 <em>attachment</em> 2"));
|
assertThat(hit.highlightFields().get("attachments.body").fragments()[0], equalTo("<em>attachment</em> 1"));
|
||||||
|
assertThat(hit.highlightFields().get("attachments.body").fragments()[1], equalTo("<em>attachment</em> 2"));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void testHighlightIssue1994() throws Exception {
|
||||||
|
try {
|
||||||
|
client.admin().indices().prepareDelete("test").execute().actionGet();
|
||||||
|
} catch (Exception e) {
|
||||||
|
// ignore
|
||||||
|
}
|
||||||
|
|
||||||
|
client.admin().indices().prepareCreate("test").setSettings(ImmutableSettings.settingsBuilder().put("number_of_shards", 2))
|
||||||
|
.addMapping("type1", jsonBuilder().startObject().startObject("type1").startObject("properties")
|
||||||
|
// we don't store title, now lets see if it works...
|
||||||
|
.startObject("title").field("type", "string").field("store", "no").endObject()
|
||||||
|
.startObject("titleTV").field("type", "string").field("store", "no").field("term_vector", "with_positions_offsets").endObject()
|
||||||
|
.endObject().endObject().endObject())
|
||||||
|
.execute().actionGet();
|
||||||
|
|
||||||
|
|
||||||
|
client.prepareIndex("test", "type1", "1")
|
||||||
|
.setSource(XContentFactory.jsonBuilder().startObject()
|
||||||
|
.startArray("title")
|
||||||
|
.value("This is a test on the highlighting bug present in elasticsearch")
|
||||||
|
.value("The bug is bugging us")
|
||||||
|
.endArray()
|
||||||
|
.startArray("titleTV")
|
||||||
|
.value("This is a test on the highlighting bug present in elasticsearch")
|
||||||
|
.value("The bug is bugging us")
|
||||||
|
.endArray()
|
||||||
|
.endObject())
|
||||||
|
.setRefresh(true).execute().actionGet();
|
||||||
|
|
||||||
|
SearchResponse search = client.prepareSearch()
|
||||||
|
.setQuery(fieldQuery("title", "bug"))
|
||||||
|
.addHighlightedField("title", -1, 2)
|
||||||
|
.addHighlightedField("titleTV", -1, 2)
|
||||||
|
.execute().actionGet();
|
||||||
|
|
||||||
|
assertThat(search.hits().totalHits(), equalTo(1l));
|
||||||
|
assertThat(search.hits().hits().length, equalTo(1));
|
||||||
|
|
||||||
|
assertThat(search.hits().hits()[0].highlightFields().get("title").fragments().length, equalTo(2));
|
||||||
|
assertThat(search.hits().hits()[0].highlightFields().get("title").fragments()[0], equalTo("This is a test on the highlighting <em>bug</em> present in elasticsearch"));
|
||||||
|
assertThat(search.hits().hits()[0].highlightFields().get("title").fragments()[1], equalTo("The <em>bug</em> is bugging us"));
|
||||||
|
assertThat(search.hits().hits()[0].highlightFields().get("titleTV").fragments().length, equalTo(2));
|
||||||
|
// assertThat(search.hits().hits()[0].highlightFields().get("titleTV").fragments()[0], equalTo("This is a test on the highlighting <em>bug</em> present in elasticsearch"));
|
||||||
|
assertThat(search.hits().hits()[0].highlightFields().get("titleTV").fragments()[0], equalTo("highlighting <em>bug</em> present in elasticsearch")); // FastVectorHighlighter starts highlighting from startOffset - margin
|
||||||
|
assertThat(search.hits().hits()[0].highlightFields().get("titleTV").fragments()[1], equalTo("The <em>bug</em> is bugging us"));
|
||||||
|
}
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
public void testPlainHighlighter() throws Exception {
|
public void testPlainHighlighter() throws Exception {
|
||||||
try {
|
try {
|
||||||
|
|
Loading…
Reference in New Issue