From cee29dedb2490c18540d4b6ec9cd3c5a06789657 Mon Sep 17 00:00:00 2001 From: Jonathan Evans Date: Tue, 5 Jul 2011 10:04:18 +0100 Subject: [PATCH] Added highlight offset for highlighted terms in fragments --- .../MarginFragListBuilder.java | 96 +++++++++++++++++++ .../search/highlight/HighlightPhase.java | 7 +- .../highlight/HighlighterParseElement.java | 2 + .../highlight/SearchContextHighlight.java | 10 ++ 4 files changed, 114 insertions(+), 1 deletion(-) create mode 100644 modules/elasticsearch/src/main/java/org/apache/lucene/search/vectorhighlight/MarginFragListBuilder.java diff --git a/modules/elasticsearch/src/main/java/org/apache/lucene/search/vectorhighlight/MarginFragListBuilder.java b/modules/elasticsearch/src/main/java/org/apache/lucene/search/vectorhighlight/MarginFragListBuilder.java new file mode 100644 index 00000000000..eeb57975742 --- /dev/null +++ b/modules/elasticsearch/src/main/java/org/apache/lucene/search/vectorhighlight/MarginFragListBuilder.java @@ -0,0 +1,96 @@ +/* + * Licensed to Elastic Search and Shay Banon under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. Elastic Search licenses this + * file to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.lucene.search.vectorhighlight; + +import java.util.ArrayList; +import java.util.Iterator; +import java.util.List; + +import org.apache.lucene.search.vectorhighlight.FieldPhraseList.WeightedPhraseInfo; + +/** + * A simple implementation of {@link FragListBuilder}. + */ +public class MarginFragListBuilder implements FragListBuilder { + + private static final int DEFAULT_MARGIN = 6; + private int margin; + private int minFragCharSize; + + public MarginFragListBuilder() { + this(DEFAULT_MARGIN); + } + + public MarginFragListBuilder(int startMargin) { + margin = startMargin; + minFragCharSize = 3*margin; + } + + public FieldFragList createFieldFragList(FieldPhraseList fieldPhraseList, int fragCharSize) { + if( fragCharSize < minFragCharSize ) + throw new IllegalArgumentException( "fragCharSize(" + fragCharSize + ") is too small. It must be " + + minFragCharSize + " or higher." ); + + FieldFragList ffl = new FieldFragList( fragCharSize ); + + List wpil = new ArrayList(); + Iterator ite = fieldPhraseList.phraseList.iterator(); + WeightedPhraseInfo phraseInfo = null; + int startOffset = 0; + boolean taken = false; + while( true ){ + if( !taken ){ + if( !ite.hasNext() ) break; + phraseInfo = ite.next(); + } + taken = false; + if( phraseInfo == null ) break; + + // if the phrase violates the border of previous fragment, discard it and try next phrase + if( phraseInfo.getStartOffset() < startOffset ) continue; + + wpil.clear(); + wpil.add( phraseInfo ); + int st = phraseInfo.getStartOffset() - margin < startOffset ? + startOffset : phraseInfo.getStartOffset() - margin; + int en = st + fragCharSize; + if( phraseInfo.getEndOffset() > en ) + en = phraseInfo.getEndOffset(); + startOffset = en; + + while( true ){ + if( ite.hasNext() ){ + phraseInfo = ite.next(); + taken = true; + if( phraseInfo == null ) break; + } + else + break; + if( phraseInfo.getEndOffset() <= en ) + wpil.add( phraseInfo ); + else + break; + } + ffl.add( st, en, wpil ); + } + return ffl; + } + +} diff --git a/modules/elasticsearch/src/main/java/org/elasticsearch/search/highlight/HighlightPhase.java b/modules/elasticsearch/src/main/java/org/elasticsearch/search/highlight/HighlightPhase.java index c726d6588de..0b6f6494741 100644 --- a/modules/elasticsearch/src/main/java/org/elasticsearch/search/highlight/HighlightPhase.java +++ b/modules/elasticsearch/src/main/java/org/elasticsearch/search/highlight/HighlightPhase.java @@ -223,13 +223,18 @@ public class HighlightPhase implements SearchHitPhase { FragmentsBuilder fragmentsBuilder; if (field.numberOfFragments() == 0) { fragListBuilder = new SingleFragListBuilder(); + if (fieldMapper.stored()) { fragmentsBuilder = new SimpleFragmentsBuilder(field.preTags(), field.postTags()); } else { fragmentsBuilder = new SourceSimpleFragmentsBuilder(fieldMapper, searchContext, field.preTags(), field.postTags()); } } else { - fragListBuilder = new SimpleFragListBuilder(); + if(field.fragmentOffset() == -1) + fragListBuilder = new SimpleFragListBuilder(); + else + fragListBuilder = new MarginFragListBuilder(field.fragmentOffset()); + if (field.scoreOrdered()) { if (fieldMapper.stored()) { fragmentsBuilder = new ScoreOrderFragmentsBuilder(field.preTags(), field.postTags()); diff --git a/modules/elasticsearch/src/main/java/org/elasticsearch/search/highlight/HighlighterParseElement.java b/modules/elasticsearch/src/main/java/org/elasticsearch/search/highlight/HighlighterParseElement.java index 55d4747bc5c..4725a20b364 100644 --- a/modules/elasticsearch/src/main/java/org/elasticsearch/search/highlight/HighlighterParseElement.java +++ b/modules/elasticsearch/src/main/java/org/elasticsearch/search/highlight/HighlighterParseElement.java @@ -137,6 +137,8 @@ public class HighlighterParseElement implements SearchParseElement { field.fragmentCharSize(parser.intValue()); } else if ("number_of_fragments".equals(fieldName) || "numberOfFragments".equals(fieldName)) { field.numberOfFragments(parser.intValue()); + } else if ("fragment_offset".equals(fieldName) || "fragmentOffset".equals(fieldName)) { + field.fragmentOffset(parser.intValue()); } else if ("highlight_filter".equals(fieldName) || "highlightFilter".equals(fieldName)) { field.highlightFilter(parser.booleanValue()); } else if ("score".equals(fieldName)) { diff --git a/modules/elasticsearch/src/main/java/org/elasticsearch/search/highlight/SearchContextHighlight.java b/modules/elasticsearch/src/main/java/org/elasticsearch/search/highlight/SearchContextHighlight.java index 6d8c95f2138..e78ac5c0be1 100644 --- a/modules/elasticsearch/src/main/java/org/elasticsearch/search/highlight/SearchContextHighlight.java +++ b/modules/elasticsearch/src/main/java/org/elasticsearch/search/highlight/SearchContextHighlight.java @@ -44,6 +44,8 @@ public class SearchContextHighlight { private int numberOfFragments = -1; + private int fragmentOffset = -1; + private String[] preTags; private String[] postTags; @@ -76,6 +78,14 @@ public class SearchContextHighlight { this.numberOfFragments = numberOfFragments; } + public int fragmentOffset() { + return fragmentOffset; + } + + public void fragmentOffset(int fragmentOffset) { + this.fragmentOffset = fragmentOffset; + } + public String[] preTags() { return preTags; }