Removed AbstractFragmentsBuilder. Lucene's BaseFragmentsBuilder has now discrete multivalued highlighting and better support for requesting large number of fragments.

This commit is contained in:
Martijn van Groningen 2013-01-24 22:15:07 +01:00
parent e56b279624
commit f974a17229
4 changed files with 11 additions and 188 deletions

View File

@ -1,185 +0,0 @@
/*
* Licensed to Elastic Search and Shay Banon under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. Elastic Search licenses this
* file to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
package org.apache.lucene.search.vectorhighlight;
import org.apache.lucene.document.Field;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.search.highlight.Encoder;
import java.io.IOException;
import java.util.*;
/**
* Abstract {@link FragmentsBuilder} implementation that detects whether highlight hits occurred on a field that is
* multivalued (Basically fields that have the same name) and splits the highlight snippets according to a single field
* boundary. This avoids that a highlight hit is shown as one hit whilst it is actually a hit on multiple fields.
*/
// LUCENE 4.1: once its out, remove this class and let subclasses extend BaseFragmentsBuilder instead.
public abstract class AbstractFragmentsBuilder extends BaseFragmentsBuilder {
private boolean discreteMultiValueHighlighting = true;
protected AbstractFragmentsBuilder() {
super();
}
protected AbstractFragmentsBuilder(BoundaryScanner boundaryScanner) {
super(boundaryScanner);
}
protected AbstractFragmentsBuilder(String[] preTags, String[] postTags) {
super(preTags, postTags);
}
public AbstractFragmentsBuilder(String[] preTags, String[] postTags, BoundaryScanner bs) {
super(preTags, postTags, bs);
}
public void setDiscreteMultiValueHighlighting(boolean discreteMultiValueHighlighting) {
this.discreteMultiValueHighlighting = discreteMultiValueHighlighting;
}
public String[] createFragments(IndexReader reader, int docId,
String fieldName, FieldFragList fieldFragList, int maxNumFragments,
String[] preTags, String[] postTags, Encoder encoder) throws IOException {
if (maxNumFragments < 0) {
throw new IllegalArgumentException("maxNumFragments(" + maxNumFragments + ") must be positive number.");
}
List<FieldFragList.WeightedFragInfo> fragInfos = fieldFragList.getFragInfos();
Field[] values = getFields(reader, docId, fieldName);
if (values.length == 0) {
return null;
}
if (discreteMultiValueHighlighting && values.length > 1) {
fragInfos = discreteMultiValueHighlighting(fragInfos, values);
}
fragInfos = getWeightedFragInfoList(fragInfos);
int limitFragments = maxNumFragments < fragInfos.size() ? maxNumFragments : fragInfos.size();
List<String> fragments = new ArrayList<String>(limitFragments);
StringBuilder buffer = new StringBuilder();
int[] nextValueIndex = {0};
for (int n = 0; n < limitFragments; n++) {
FieldFragList.WeightedFragInfo fragInfo = fragInfos.get(n);
fragments.add(makeFragment(buffer, nextValueIndex, values, fragInfo, preTags, postTags, encoder));
}
return fragments.toArray(new String[fragments.size()]);
}
protected List<FieldFragList.WeightedFragInfo> discreteMultiValueHighlighting(List<FieldFragList.WeightedFragInfo> fragInfos, Field[] fields) {
Map<String, List<FieldFragList.WeightedFragInfo>> fieldNameToFragInfos = new HashMap<String, List<FieldFragList.WeightedFragInfo>>();
for (Field field : fields) {
fieldNameToFragInfos.put(field.name(), new ArrayList<FieldFragList.WeightedFragInfo>());
}
fragInfos:
for (FieldFragList.WeightedFragInfo fragInfo : fragInfos) {
int fieldStart;
int fieldEnd = 0;
for (Field field : fields) {
if (field.stringValue().isEmpty()) {
fieldEnd++;
continue;
}
fieldStart = fieldEnd;
fieldEnd += field.stringValue().length() + 1; // + 1 for going to next field with same name.
if (fragInfo.getStartOffset() >= fieldStart && fragInfo.getEndOffset() >= fieldStart &&
fragInfo.getStartOffset() <= fieldEnd && fragInfo.getEndOffset() <= fieldEnd) {
fieldNameToFragInfos.get(field.name()).add(fragInfo);
continue fragInfos;
}
if (fragInfo.getSubInfos().isEmpty()) {
continue fragInfos;
}
FieldPhraseList.WeightedPhraseInfo.Toffs firstToffs = fragInfo.getSubInfos().get(0).getTermsOffsets().get(0);
if (fragInfo.getStartOffset() >= fieldEnd || firstToffs.getStartOffset() >= fieldEnd) {
continue;
}
int fragStart = fieldStart;
if (fragInfo.getStartOffset() > fieldStart && fragInfo.getStartOffset() < fieldEnd) {
fragStart = fragInfo.getStartOffset();
}
int fragEnd = fieldEnd;
if (fragInfo.getEndOffset() > fieldStart && fragInfo.getEndOffset() < fieldEnd) {
fragEnd = fragInfo.getEndOffset();
}
List<WeightedFragInfo.SubInfo> subInfos = new ArrayList<WeightedFragInfo.SubInfo>();
WeightedFragInfo weightedFragInfo = new WeightedFragInfo(fragStart, fragEnd, fragInfo.getTotalBoost(), subInfos);
Iterator<FieldFragList.WeightedFragInfo.SubInfo> subInfoIterator = fragInfo.getSubInfos().iterator();
while (subInfoIterator.hasNext()) {
FieldFragList.WeightedFragInfo.SubInfo subInfo = subInfoIterator.next();
List<FieldPhraseList.WeightedPhraseInfo.Toffs> toffsList = new ArrayList<FieldPhraseList.WeightedPhraseInfo.Toffs>();
Iterator<FieldPhraseList.WeightedPhraseInfo.Toffs> toffsIterator = subInfo.getTermsOffsets().iterator();
while (toffsIterator.hasNext()) {
FieldPhraseList.WeightedPhraseInfo.Toffs toffs = toffsIterator.next();
if (toffs.getStartOffset() >= fieldStart && toffs.getEndOffset() <= fieldEnd) {
toffsList.add(toffs);
toffsIterator.remove();
}
}
if (!toffsList.isEmpty()) {
subInfos.add(new FieldFragList.WeightedFragInfo.SubInfo(subInfo.getText(), toffsList, subInfo.getSeqnum()));
}
if (subInfo.getTermsOffsets().isEmpty()) {
subInfoIterator.remove();
}
}
fieldNameToFragInfos.get(field.name()).add(weightedFragInfo);
}
}
List<FieldFragList.WeightedFragInfo> result = new ArrayList<FieldFragList.WeightedFragInfo>();
for (List<FieldFragList.WeightedFragInfo> weightedFragInfos : fieldNameToFragInfos.values()) {
result.addAll(weightedFragInfos);
}
Collections.sort(result, new Comparator<FieldFragList.WeightedFragInfo>() {
public int compare(FieldFragList.WeightedFragInfo info1, FieldFragList.WeightedFragInfo info2) {
return info1.getStartOffset() - info2.getStartOffset();
}
});
return result;
}
private static class WeightedFragInfo extends FieldFragList.WeightedFragInfo {
private final static List<FieldPhraseList.WeightedPhraseInfo> EMPTY = Collections.emptyList();
private WeightedFragInfo(int startOffset, int endOffset, float totalBoost, List<FieldFragList.WeightedFragInfo.SubInfo> subInfos) {
super(startOffset, endOffset, subInfos, totalBoost);
}
}
}

View File

@ -26,13 +26,14 @@ import java.util.List;
/**
* An implementation of FragmentsBuilder that outputs score-order fragments.
*/
public class XScoreOrderFragmentsBuilder extends AbstractFragmentsBuilder {
public class XScoreOrderFragmentsBuilder extends BaseFragmentsBuilder {
/**
* a constructor.
*/
public XScoreOrderFragmentsBuilder() {
super();
setDiscreteMultiValueHighlighting(true);
}
/**
@ -43,14 +44,17 @@ public class XScoreOrderFragmentsBuilder extends AbstractFragmentsBuilder {
*/
public XScoreOrderFragmentsBuilder(String[] preTags, String[] postTags) {
super(preTags, postTags);
setDiscreteMultiValueHighlighting(true);
}
public XScoreOrderFragmentsBuilder(BoundaryScanner bs) {
super(bs);
setDiscreteMultiValueHighlighting(true);
}
public XScoreOrderFragmentsBuilder(String[] preTags, String[] postTags, BoundaryScanner bs) {
super(preTags, postTags, bs);
setDiscreteMultiValueHighlighting(true);
}
/**

View File

@ -24,13 +24,14 @@ import java.util.List;
/**
* A simple implementation of FragmentsBuilder.
*/
public class XSimpleFragmentsBuilder extends AbstractFragmentsBuilder {
public class XSimpleFragmentsBuilder extends BaseFragmentsBuilder {
/**
* a constructor.
*/
public XSimpleFragmentsBuilder() {
super();
setDiscreteMultiValueHighlighting(true);
}
/**
@ -41,14 +42,17 @@ public class XSimpleFragmentsBuilder extends AbstractFragmentsBuilder {
*/
public XSimpleFragmentsBuilder(String[] preTags, String[] postTags) {
super(preTags, postTags);
setDiscreteMultiValueHighlighting(true);
}
public XSimpleFragmentsBuilder(BoundaryScanner bs) {
super(bs);
setDiscreteMultiValueHighlighting(true);
}
public XSimpleFragmentsBuilder(String[] preTags, String[] postTags, BoundaryScanner bs) {
super(preTags, postTags, bs);
setDiscreteMultiValueHighlighting(true);
}
/**

View File

@ -251,7 +251,7 @@ public class HighlightPhase extends AbstractComponent implements FetchSubPhase {
FieldQuery fieldQuery = null;
if (entry == null) {
FragListBuilder fragListBuilder;
AbstractFragmentsBuilder fragmentsBuilder;
BaseFragmentsBuilder fragmentsBuilder;
BoundaryScanner boundaryScanner = SimpleBoundaryScanner2.DEFAULT;
if (field.boundaryMaxScan() != SimpleBoundaryScanner2.DEFAULT_MAX_SCAN || field.boundaryChars() != SimpleBoundaryScanner2.DEFAULT_BOUNDARY_CHARS) {