diff --git a/src/main/java/org/apache/lucene/search/vectorhighlight/AbstractFragmentsBuilder.java b/src/main/java/org/apache/lucene/search/vectorhighlight/AbstractFragmentsBuilder.java deleted file mode 100644 index de1749ef1a0..00000000000 --- a/src/main/java/org/apache/lucene/search/vectorhighlight/AbstractFragmentsBuilder.java +++ /dev/null @@ -1,185 +0,0 @@ -/* - * Licensed to Elastic Search and Shay Banon under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. Elastic Search licenses this - * file to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ - -package org.apache.lucene.search.vectorhighlight; - -import org.apache.lucene.document.Field; -import org.apache.lucene.index.IndexReader; -import org.apache.lucene.search.highlight.Encoder; - -import java.io.IOException; -import java.util.*; - -/** - * Abstract {@link FragmentsBuilder} implementation that detects whether highlight hits occurred on a field that is - * multivalued (Basically fields that have the same name) and splits the highlight snippets according to a single field - * boundary. This avoids that a highlight hit is shown as one hit whilst it is actually a hit on multiple fields. - */ -// LUCENE 4.1: once its out, remove this class and let subclasses extend BaseFragmentsBuilder instead. -public abstract class AbstractFragmentsBuilder extends BaseFragmentsBuilder { - - private boolean discreteMultiValueHighlighting = true; - - protected AbstractFragmentsBuilder() { - super(); - } - - protected AbstractFragmentsBuilder(BoundaryScanner boundaryScanner) { - super(boundaryScanner); - } - - protected AbstractFragmentsBuilder(String[] preTags, String[] postTags) { - super(preTags, postTags); - } - - public AbstractFragmentsBuilder(String[] preTags, String[] postTags, BoundaryScanner bs) { - super(preTags, postTags, bs); - } - - public void setDiscreteMultiValueHighlighting(boolean discreteMultiValueHighlighting) { - this.discreteMultiValueHighlighting = discreteMultiValueHighlighting; - } - - public String[] createFragments(IndexReader reader, int docId, - String fieldName, FieldFragList fieldFragList, int maxNumFragments, - String[] preTags, String[] postTags, Encoder encoder) throws IOException { - if (maxNumFragments < 0) { - throw new IllegalArgumentException("maxNumFragments(" + maxNumFragments + ") must be positive number."); - } - - List fragInfos = fieldFragList.getFragInfos(); - Field[] values = getFields(reader, docId, fieldName); - if (values.length == 0) { - return null; - } - - if (discreteMultiValueHighlighting && values.length > 1) { - fragInfos = discreteMultiValueHighlighting(fragInfos, values); - } - - fragInfos = getWeightedFragInfoList(fragInfos); - - int limitFragments = maxNumFragments < fragInfos.size() ? maxNumFragments : fragInfos.size(); - List fragments = new ArrayList(limitFragments); - - StringBuilder buffer = new StringBuilder(); - int[] nextValueIndex = {0}; - for (int n = 0; n < limitFragments; n++) { - FieldFragList.WeightedFragInfo fragInfo = fragInfos.get(n); - fragments.add(makeFragment(buffer, nextValueIndex, values, fragInfo, preTags, postTags, encoder)); - } - return fragments.toArray(new String[fragments.size()]); - } - - protected List discreteMultiValueHighlighting(List fragInfos, Field[] fields) { - Map> fieldNameToFragInfos = new HashMap>(); - for (Field field : fields) { - fieldNameToFragInfos.put(field.name(), new ArrayList()); - } - - fragInfos: - for (FieldFragList.WeightedFragInfo fragInfo : fragInfos) { - int fieldStart; - int fieldEnd = 0; - for (Field field : fields) { - if (field.stringValue().isEmpty()) { - fieldEnd++; - continue; - } - fieldStart = fieldEnd; - fieldEnd += field.stringValue().length() + 1; // + 1 for going to next field with same name. - - if (fragInfo.getStartOffset() >= fieldStart && fragInfo.getEndOffset() >= fieldStart && - fragInfo.getStartOffset() <= fieldEnd && fragInfo.getEndOffset() <= fieldEnd) { - fieldNameToFragInfos.get(field.name()).add(fragInfo); - continue fragInfos; - } - - if (fragInfo.getSubInfos().isEmpty()) { - continue fragInfos; - } - - FieldPhraseList.WeightedPhraseInfo.Toffs firstToffs = fragInfo.getSubInfos().get(0).getTermsOffsets().get(0); - if (fragInfo.getStartOffset() >= fieldEnd || firstToffs.getStartOffset() >= fieldEnd) { - continue; - } - - int fragStart = fieldStart; - if (fragInfo.getStartOffset() > fieldStart && fragInfo.getStartOffset() < fieldEnd) { - fragStart = fragInfo.getStartOffset(); - } - - int fragEnd = fieldEnd; - if (fragInfo.getEndOffset() > fieldStart && fragInfo.getEndOffset() < fieldEnd) { - fragEnd = fragInfo.getEndOffset(); - } - - - List subInfos = new ArrayList(); - WeightedFragInfo weightedFragInfo = new WeightedFragInfo(fragStart, fragEnd, fragInfo.getTotalBoost(), subInfos); - - Iterator subInfoIterator = fragInfo.getSubInfos().iterator(); - while (subInfoIterator.hasNext()) { - FieldFragList.WeightedFragInfo.SubInfo subInfo = subInfoIterator.next(); - List toffsList = new ArrayList(); - Iterator toffsIterator = subInfo.getTermsOffsets().iterator(); - while (toffsIterator.hasNext()) { - FieldPhraseList.WeightedPhraseInfo.Toffs toffs = toffsIterator.next(); - if (toffs.getStartOffset() >= fieldStart && toffs.getEndOffset() <= fieldEnd) { - toffsList.add(toffs); - toffsIterator.remove(); - } - } - if (!toffsList.isEmpty()) { - subInfos.add(new FieldFragList.WeightedFragInfo.SubInfo(subInfo.getText(), toffsList, subInfo.getSeqnum())); - } - - if (subInfo.getTermsOffsets().isEmpty()) { - subInfoIterator.remove(); - } - } - fieldNameToFragInfos.get(field.name()).add(weightedFragInfo); - } - } - - List result = new ArrayList(); - for (List weightedFragInfos : fieldNameToFragInfos.values()) { - result.addAll(weightedFragInfos); - } - Collections.sort(result, new Comparator() { - - public int compare(FieldFragList.WeightedFragInfo info1, FieldFragList.WeightedFragInfo info2) { - return info1.getStartOffset() - info2.getStartOffset(); - } - - }); - - return result; - } - - private static class WeightedFragInfo extends FieldFragList.WeightedFragInfo { - - private final static List EMPTY = Collections.emptyList(); - - private WeightedFragInfo(int startOffset, int endOffset, float totalBoost, List subInfos) { - super(startOffset, endOffset, subInfos, totalBoost); - } - } - -} diff --git a/src/main/java/org/apache/lucene/search/vectorhighlight/XScoreOrderFragmentsBuilder.java b/src/main/java/org/apache/lucene/search/vectorhighlight/XScoreOrderFragmentsBuilder.java index 9a3a33c472e..29284dc9473 100644 --- a/src/main/java/org/apache/lucene/search/vectorhighlight/XScoreOrderFragmentsBuilder.java +++ b/src/main/java/org/apache/lucene/search/vectorhighlight/XScoreOrderFragmentsBuilder.java @@ -26,13 +26,14 @@ import java.util.List; /** * An implementation of FragmentsBuilder that outputs score-order fragments. */ -public class XScoreOrderFragmentsBuilder extends AbstractFragmentsBuilder { +public class XScoreOrderFragmentsBuilder extends BaseFragmentsBuilder { /** * a constructor. */ public XScoreOrderFragmentsBuilder() { super(); + setDiscreteMultiValueHighlighting(true); } /** @@ -43,14 +44,17 @@ public class XScoreOrderFragmentsBuilder extends AbstractFragmentsBuilder { */ public XScoreOrderFragmentsBuilder(String[] preTags, String[] postTags) { super(preTags, postTags); + setDiscreteMultiValueHighlighting(true); } public XScoreOrderFragmentsBuilder(BoundaryScanner bs) { super(bs); + setDiscreteMultiValueHighlighting(true); } public XScoreOrderFragmentsBuilder(String[] preTags, String[] postTags, BoundaryScanner bs) { super(preTags, postTags, bs); + setDiscreteMultiValueHighlighting(true); } /** diff --git a/src/main/java/org/apache/lucene/search/vectorhighlight/XSimpleFragmentsBuilder.java b/src/main/java/org/apache/lucene/search/vectorhighlight/XSimpleFragmentsBuilder.java index d8f133c14ef..b6836370afa 100644 --- a/src/main/java/org/apache/lucene/search/vectorhighlight/XSimpleFragmentsBuilder.java +++ b/src/main/java/org/apache/lucene/search/vectorhighlight/XSimpleFragmentsBuilder.java @@ -24,13 +24,14 @@ import java.util.List; /** * A simple implementation of FragmentsBuilder. */ -public class XSimpleFragmentsBuilder extends AbstractFragmentsBuilder { +public class XSimpleFragmentsBuilder extends BaseFragmentsBuilder { /** * a constructor. */ public XSimpleFragmentsBuilder() { super(); + setDiscreteMultiValueHighlighting(true); } /** @@ -41,14 +42,17 @@ public class XSimpleFragmentsBuilder extends AbstractFragmentsBuilder { */ public XSimpleFragmentsBuilder(String[] preTags, String[] postTags) { super(preTags, postTags); + setDiscreteMultiValueHighlighting(true); } public XSimpleFragmentsBuilder(BoundaryScanner bs) { super(bs); + setDiscreteMultiValueHighlighting(true); } public XSimpleFragmentsBuilder(String[] preTags, String[] postTags, BoundaryScanner bs) { super(preTags, postTags, bs); + setDiscreteMultiValueHighlighting(true); } /** diff --git a/src/main/java/org/elasticsearch/search/highlight/HighlightPhase.java b/src/main/java/org/elasticsearch/search/highlight/HighlightPhase.java index 50d5fb039b1..6bef7751641 100644 --- a/src/main/java/org/elasticsearch/search/highlight/HighlightPhase.java +++ b/src/main/java/org/elasticsearch/search/highlight/HighlightPhase.java @@ -251,7 +251,7 @@ public class HighlightPhase extends AbstractComponent implements FetchSubPhase { FieldQuery fieldQuery = null; if (entry == null) { FragListBuilder fragListBuilder; - AbstractFragmentsBuilder fragmentsBuilder; + BaseFragmentsBuilder fragmentsBuilder; BoundaryScanner boundaryScanner = SimpleBoundaryScanner2.DEFAULT; if (field.boundaryMaxScan() != SimpleBoundaryScanner2.DEFAULT_MAX_SCAN || field.boundaryChars() != SimpleBoundaryScanner2.DEFAULT_BOUNDARY_CHARS) {