Added highlight offset for highlighted terms in fragments
This commit is contained in:
parent
19c47cdceb
commit
cee29dedb2
|
@ -0,0 +1,96 @@
|
||||||
|
/*
|
||||||
|
* Licensed to Elastic Search and Shay Banon under one
|
||||||
|
* or more contributor license agreements. See the NOTICE file
|
||||||
|
* distributed with this work for additional information
|
||||||
|
* regarding copyright ownership. Elastic Search licenses this
|
||||||
|
* file to you under the Apache License, Version 2.0 (the
|
||||||
|
* "License"); you may not use this file except in compliance
|
||||||
|
* with the License. You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing,
|
||||||
|
* software distributed under the License is distributed on an
|
||||||
|
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
||||||
|
* KIND, either express or implied. See the License for the
|
||||||
|
* specific language governing permissions and limitations
|
||||||
|
* under the License.
|
||||||
|
*/
|
||||||
|
|
||||||
|
package org.apache.lucene.search.vectorhighlight;
|
||||||
|
|
||||||
|
import java.util.ArrayList;
|
||||||
|
import java.util.Iterator;
|
||||||
|
import java.util.List;
|
||||||
|
|
||||||
|
import org.apache.lucene.search.vectorhighlight.FieldPhraseList.WeightedPhraseInfo;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* A simple implementation of {@link FragListBuilder}.
|
||||||
|
*/
|
||||||
|
public class MarginFragListBuilder implements FragListBuilder {
|
||||||
|
|
||||||
|
private static final int DEFAULT_MARGIN = 6;
|
||||||
|
private int margin;
|
||||||
|
private int minFragCharSize;
|
||||||
|
|
||||||
|
public MarginFragListBuilder() {
|
||||||
|
this(DEFAULT_MARGIN);
|
||||||
|
}
|
||||||
|
|
||||||
|
public MarginFragListBuilder(int startMargin) {
|
||||||
|
margin = startMargin;
|
||||||
|
minFragCharSize = 3*margin;
|
||||||
|
}
|
||||||
|
|
||||||
|
public FieldFragList createFieldFragList(FieldPhraseList fieldPhraseList, int fragCharSize) {
|
||||||
|
if( fragCharSize < minFragCharSize )
|
||||||
|
throw new IllegalArgumentException( "fragCharSize(" + fragCharSize + ") is too small. It must be " +
|
||||||
|
minFragCharSize + " or higher." );
|
||||||
|
|
||||||
|
FieldFragList ffl = new FieldFragList( fragCharSize );
|
||||||
|
|
||||||
|
List<WeightedPhraseInfo> wpil = new ArrayList<WeightedPhraseInfo>();
|
||||||
|
Iterator<WeightedPhraseInfo> ite = fieldPhraseList.phraseList.iterator();
|
||||||
|
WeightedPhraseInfo phraseInfo = null;
|
||||||
|
int startOffset = 0;
|
||||||
|
boolean taken = false;
|
||||||
|
while( true ){
|
||||||
|
if( !taken ){
|
||||||
|
if( !ite.hasNext() ) break;
|
||||||
|
phraseInfo = ite.next();
|
||||||
|
}
|
||||||
|
taken = false;
|
||||||
|
if( phraseInfo == null ) break;
|
||||||
|
|
||||||
|
// if the phrase violates the border of previous fragment, discard it and try next phrase
|
||||||
|
if( phraseInfo.getStartOffset() < startOffset ) continue;
|
||||||
|
|
||||||
|
wpil.clear();
|
||||||
|
wpil.add( phraseInfo );
|
||||||
|
int st = phraseInfo.getStartOffset() - margin < startOffset ?
|
||||||
|
startOffset : phraseInfo.getStartOffset() - margin;
|
||||||
|
int en = st + fragCharSize;
|
||||||
|
if( phraseInfo.getEndOffset() > en )
|
||||||
|
en = phraseInfo.getEndOffset();
|
||||||
|
startOffset = en;
|
||||||
|
|
||||||
|
while( true ){
|
||||||
|
if( ite.hasNext() ){
|
||||||
|
phraseInfo = ite.next();
|
||||||
|
taken = true;
|
||||||
|
if( phraseInfo == null ) break;
|
||||||
|
}
|
||||||
|
else
|
||||||
|
break;
|
||||||
|
if( phraseInfo.getEndOffset() <= en )
|
||||||
|
wpil.add( phraseInfo );
|
||||||
|
else
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
ffl.add( st, en, wpil );
|
||||||
|
}
|
||||||
|
return ffl;
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
|
@ -223,13 +223,18 @@ public class HighlightPhase implements SearchHitPhase {
|
||||||
FragmentsBuilder fragmentsBuilder;
|
FragmentsBuilder fragmentsBuilder;
|
||||||
if (field.numberOfFragments() == 0) {
|
if (field.numberOfFragments() == 0) {
|
||||||
fragListBuilder = new SingleFragListBuilder();
|
fragListBuilder = new SingleFragListBuilder();
|
||||||
|
|
||||||
if (fieldMapper.stored()) {
|
if (fieldMapper.stored()) {
|
||||||
fragmentsBuilder = new SimpleFragmentsBuilder(field.preTags(), field.postTags());
|
fragmentsBuilder = new SimpleFragmentsBuilder(field.preTags(), field.postTags());
|
||||||
} else {
|
} else {
|
||||||
fragmentsBuilder = new SourceSimpleFragmentsBuilder(fieldMapper, searchContext, field.preTags(), field.postTags());
|
fragmentsBuilder = new SourceSimpleFragmentsBuilder(fieldMapper, searchContext, field.preTags(), field.postTags());
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
|
if(field.fragmentOffset() == -1)
|
||||||
fragListBuilder = new SimpleFragListBuilder();
|
fragListBuilder = new SimpleFragListBuilder();
|
||||||
|
else
|
||||||
|
fragListBuilder = new MarginFragListBuilder(field.fragmentOffset());
|
||||||
|
|
||||||
if (field.scoreOrdered()) {
|
if (field.scoreOrdered()) {
|
||||||
if (fieldMapper.stored()) {
|
if (fieldMapper.stored()) {
|
||||||
fragmentsBuilder = new ScoreOrderFragmentsBuilder(field.preTags(), field.postTags());
|
fragmentsBuilder = new ScoreOrderFragmentsBuilder(field.preTags(), field.postTags());
|
||||||
|
|
|
@ -137,6 +137,8 @@ public class HighlighterParseElement implements SearchParseElement {
|
||||||
field.fragmentCharSize(parser.intValue());
|
field.fragmentCharSize(parser.intValue());
|
||||||
} else if ("number_of_fragments".equals(fieldName) || "numberOfFragments".equals(fieldName)) {
|
} else if ("number_of_fragments".equals(fieldName) || "numberOfFragments".equals(fieldName)) {
|
||||||
field.numberOfFragments(parser.intValue());
|
field.numberOfFragments(parser.intValue());
|
||||||
|
} else if ("fragment_offset".equals(fieldName) || "fragmentOffset".equals(fieldName)) {
|
||||||
|
field.fragmentOffset(parser.intValue());
|
||||||
} else if ("highlight_filter".equals(fieldName) || "highlightFilter".equals(fieldName)) {
|
} else if ("highlight_filter".equals(fieldName) || "highlightFilter".equals(fieldName)) {
|
||||||
field.highlightFilter(parser.booleanValue());
|
field.highlightFilter(parser.booleanValue());
|
||||||
} else if ("score".equals(fieldName)) {
|
} else if ("score".equals(fieldName)) {
|
||||||
|
|
|
@ -44,6 +44,8 @@ public class SearchContextHighlight {
|
||||||
|
|
||||||
private int numberOfFragments = -1;
|
private int numberOfFragments = -1;
|
||||||
|
|
||||||
|
private int fragmentOffset = -1;
|
||||||
|
|
||||||
private String[] preTags;
|
private String[] preTags;
|
||||||
|
|
||||||
private String[] postTags;
|
private String[] postTags;
|
||||||
|
@ -76,6 +78,14 @@ public class SearchContextHighlight {
|
||||||
this.numberOfFragments = numberOfFragments;
|
this.numberOfFragments = numberOfFragments;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public int fragmentOffset() {
|
||||||
|
return fragmentOffset;
|
||||||
|
}
|
||||||
|
|
||||||
|
public void fragmentOffset(int fragmentOffset) {
|
||||||
|
this.fragmentOffset = fragmentOffset;
|
||||||
|
}
|
||||||
|
|
||||||
public String[] preTags() {
|
public String[] preTags() {
|
||||||
return preTags;
|
return preTags;
|
||||||
}
|
}
|
||||||
|
|
Loading…
Reference in New Issue