Added highlight offset for highlighted terms in fragments
This commit is contained in:
parent
19c47cdceb
commit
cee29dedb2
|
@ -0,0 +1,96 @@
|
|||
/*
|
||||
* Licensed to Elastic Search and Shay Banon under one
|
||||
* or more contributor license agreements. See the NOTICE file
|
||||
* distributed with this work for additional information
|
||||
* regarding copyright ownership. Elastic Search licenses this
|
||||
* file to you under the Apache License, Version 2.0 (the
|
||||
* "License"); you may not use this file except in compliance
|
||||
* with the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing,
|
||||
* software distributed under the License is distributed on an
|
||||
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
||||
* KIND, either express or implied. See the License for the
|
||||
* specific language governing permissions and limitations
|
||||
* under the License.
|
||||
*/
|
||||
|
||||
package org.apache.lucene.search.vectorhighlight;
|
||||
|
||||
import java.util.ArrayList;
|
||||
import java.util.Iterator;
|
||||
import java.util.List;
|
||||
|
||||
import org.apache.lucene.search.vectorhighlight.FieldPhraseList.WeightedPhraseInfo;
|
||||
|
||||
/**
|
||||
* A simple implementation of {@link FragListBuilder}.
|
||||
*/
|
||||
public class MarginFragListBuilder implements FragListBuilder {
|
||||
|
||||
private static final int DEFAULT_MARGIN = 6;
|
||||
private int margin;
|
||||
private int minFragCharSize;
|
||||
|
||||
public MarginFragListBuilder() {
|
||||
this(DEFAULT_MARGIN);
|
||||
}
|
||||
|
||||
public MarginFragListBuilder(int startMargin) {
|
||||
margin = startMargin;
|
||||
minFragCharSize = 3*margin;
|
||||
}
|
||||
|
||||
public FieldFragList createFieldFragList(FieldPhraseList fieldPhraseList, int fragCharSize) {
|
||||
if( fragCharSize < minFragCharSize )
|
||||
throw new IllegalArgumentException( "fragCharSize(" + fragCharSize + ") is too small. It must be " +
|
||||
minFragCharSize + " or higher." );
|
||||
|
||||
FieldFragList ffl = new FieldFragList( fragCharSize );
|
||||
|
||||
List<WeightedPhraseInfo> wpil = new ArrayList<WeightedPhraseInfo>();
|
||||
Iterator<WeightedPhraseInfo> ite = fieldPhraseList.phraseList.iterator();
|
||||
WeightedPhraseInfo phraseInfo = null;
|
||||
int startOffset = 0;
|
||||
boolean taken = false;
|
||||
while( true ){
|
||||
if( !taken ){
|
||||
if( !ite.hasNext() ) break;
|
||||
phraseInfo = ite.next();
|
||||
}
|
||||
taken = false;
|
||||
if( phraseInfo == null ) break;
|
||||
|
||||
// if the phrase violates the border of previous fragment, discard it and try next phrase
|
||||
if( phraseInfo.getStartOffset() < startOffset ) continue;
|
||||
|
||||
wpil.clear();
|
||||
wpil.add( phraseInfo );
|
||||
int st = phraseInfo.getStartOffset() - margin < startOffset ?
|
||||
startOffset : phraseInfo.getStartOffset() - margin;
|
||||
int en = st + fragCharSize;
|
||||
if( phraseInfo.getEndOffset() > en )
|
||||
en = phraseInfo.getEndOffset();
|
||||
startOffset = en;
|
||||
|
||||
while( true ){
|
||||
if( ite.hasNext() ){
|
||||
phraseInfo = ite.next();
|
||||
taken = true;
|
||||
if( phraseInfo == null ) break;
|
||||
}
|
||||
else
|
||||
break;
|
||||
if( phraseInfo.getEndOffset() <= en )
|
||||
wpil.add( phraseInfo );
|
||||
else
|
||||
break;
|
||||
}
|
||||
ffl.add( st, en, wpil );
|
||||
}
|
||||
return ffl;
|
||||
}
|
||||
|
||||
}
|
|
@ -223,13 +223,18 @@ public class HighlightPhase implements SearchHitPhase {
|
|||
FragmentsBuilder fragmentsBuilder;
|
||||
if (field.numberOfFragments() == 0) {
|
||||
fragListBuilder = new SingleFragListBuilder();
|
||||
|
||||
if (fieldMapper.stored()) {
|
||||
fragmentsBuilder = new SimpleFragmentsBuilder(field.preTags(), field.postTags());
|
||||
} else {
|
||||
fragmentsBuilder = new SourceSimpleFragmentsBuilder(fieldMapper, searchContext, field.preTags(), field.postTags());
|
||||
}
|
||||
} else {
|
||||
fragListBuilder = new SimpleFragListBuilder();
|
||||
if(field.fragmentOffset() == -1)
|
||||
fragListBuilder = new SimpleFragListBuilder();
|
||||
else
|
||||
fragListBuilder = new MarginFragListBuilder(field.fragmentOffset());
|
||||
|
||||
if (field.scoreOrdered()) {
|
||||
if (fieldMapper.stored()) {
|
||||
fragmentsBuilder = new ScoreOrderFragmentsBuilder(field.preTags(), field.postTags());
|
||||
|
|
|
@ -137,6 +137,8 @@ public class HighlighterParseElement implements SearchParseElement {
|
|||
field.fragmentCharSize(parser.intValue());
|
||||
} else if ("number_of_fragments".equals(fieldName) || "numberOfFragments".equals(fieldName)) {
|
||||
field.numberOfFragments(parser.intValue());
|
||||
} else if ("fragment_offset".equals(fieldName) || "fragmentOffset".equals(fieldName)) {
|
||||
field.fragmentOffset(parser.intValue());
|
||||
} else if ("highlight_filter".equals(fieldName) || "highlightFilter".equals(fieldName)) {
|
||||
field.highlightFilter(parser.booleanValue());
|
||||
} else if ("score".equals(fieldName)) {
|
||||
|
|
|
@ -44,6 +44,8 @@ public class SearchContextHighlight {
|
|||
|
||||
private int numberOfFragments = -1;
|
||||
|
||||
private int fragmentOffset = -1;
|
||||
|
||||
private String[] preTags;
|
||||
|
||||
private String[] postTags;
|
||||
|
@ -76,6 +78,14 @@ public class SearchContextHighlight {
|
|||
this.numberOfFragments = numberOfFragments;
|
||||
}
|
||||
|
||||
public int fragmentOffset() {
|
||||
return fragmentOffset;
|
||||
}
|
||||
|
||||
public void fragmentOffset(int fragmentOffset) {
|
||||
this.fragmentOffset = fragmentOffset;
|
||||
}
|
||||
|
||||
public String[] preTags() {
|
||||
return preTags;
|
||||
}
|
||||
|
|
Loading…
Reference in New Issue