Revert "Add FastVectorHighlighter support for more complex queries."
This reverts commit e943cc81a5
.
The more complex queries support causes StackOverflowErrors that
can influence the cluster performance and stability dramatically.
This commit backs out this change to reduce the risk for deep
stacks.
Reverts #3357
This commit is contained in:
parent
623e340d4f
commit
1b756ba23a
|
@ -40,7 +40,7 @@ import java.util.List;
|
|||
*
|
||||
*/
|
||||
// LUCENE MONITOR
|
||||
public class CustomFieldQuery extends XFieldQuery {
|
||||
public class CustomFieldQuery extends FieldQuery {
|
||||
|
||||
private static Field multiTermQueryWrapperFilterQueryField;
|
||||
|
||||
|
@ -55,7 +55,7 @@ public class CustomFieldQuery extends XFieldQuery {
|
|||
|
||||
public static final ThreadLocal<Boolean> highlightFilters = new ThreadLocal<Boolean>();
|
||||
|
||||
public CustomFieldQuery(Query query, IndexReader reader, XFastVectorHighlighter highlighter) throws IOException {
|
||||
public CustomFieldQuery(Query query, IndexReader reader, FastVectorHighlighter highlighter) throws IOException {
|
||||
this(query, reader, highlighter.isPhraseHighlight(), highlighter.isFieldMatch());
|
||||
}
|
||||
|
||||
|
|
|
@ -1,144 +0,0 @@
|
|||
package org.apache.lucene.search.vectorhighlight;
|
||||
|
||||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
import org.apache.lucene.search.vectorhighlight.XFieldPhraseList.WeightedPhraseInfo;
|
||||
|
||||
import java.util.ArrayList;
|
||||
import java.util.Iterator;
|
||||
import java.util.List;
|
||||
|
||||
/**
|
||||
* A abstract implementation of {@link XFragListBuilder}.
|
||||
*/
|
||||
//LUCENE MONITOR - REMOVE ME WHEN LUCENE 4.5 IS OUT
|
||||
public abstract class XBaseFragListBuilder implements XFragListBuilder {
|
||||
|
||||
public static final int MARGIN_DEFAULT = 6;
|
||||
public static final int MIN_FRAG_CHAR_SIZE_FACTOR = 3;
|
||||
|
||||
final int margin;
|
||||
final int minFragCharSize;
|
||||
|
||||
public XBaseFragListBuilder( int margin ){
|
||||
if( margin < 0 )
|
||||
throw new IllegalArgumentException( "margin(" + margin + ") is too small. It must be 0 or higher." );
|
||||
|
||||
this.margin = margin;
|
||||
this.minFragCharSize = Math.max( 1, margin * MIN_FRAG_CHAR_SIZE_FACTOR );
|
||||
}
|
||||
|
||||
public XBaseFragListBuilder(){
|
||||
this( MARGIN_DEFAULT );
|
||||
}
|
||||
|
||||
protected XFieldFragList createFieldFragList( XFieldPhraseList fieldPhraseList, XFieldFragList fieldFragList, int fragCharSize ){
|
||||
if( fragCharSize < minFragCharSize )
|
||||
throw new IllegalArgumentException( "fragCharSize(" + fragCharSize + ") is too small. It must be " + minFragCharSize + " or higher." );
|
||||
|
||||
List<WeightedPhraseInfo> wpil = new ArrayList<WeightedPhraseInfo>();
|
||||
IteratorQueue<WeightedPhraseInfo> queue = new IteratorQueue<WeightedPhraseInfo>(fieldPhraseList.getPhraseList().iterator());
|
||||
WeightedPhraseInfo phraseInfo = null;
|
||||
int startOffset = 0;
|
||||
while((phraseInfo = queue.top()) != null){
|
||||
// if the phrase violates the border of previous fragment, discard it and try next phrase
|
||||
if( phraseInfo.getStartOffset() < startOffset ) {
|
||||
queue.removeTop();
|
||||
continue;
|
||||
}
|
||||
|
||||
wpil.clear();
|
||||
final int currentPhraseStartOffset = phraseInfo.getStartOffset();
|
||||
int currentPhraseEndOffset = phraseInfo.getEndOffset();
|
||||
int spanStart = Math.max(currentPhraseStartOffset - margin, startOffset);
|
||||
int spanEnd = Math.max(currentPhraseEndOffset, spanStart + fragCharSize);
|
||||
if (acceptPhrase(queue.removeTop(), currentPhraseEndOffset - currentPhraseStartOffset, fragCharSize)) {
|
||||
wpil.add(phraseInfo);
|
||||
}
|
||||
while((phraseInfo = queue.top()) != null) { // pull until we crossed the current spanEnd
|
||||
if (phraseInfo.getEndOffset() <= spanEnd) {
|
||||
currentPhraseEndOffset = phraseInfo.getEndOffset();
|
||||
if (acceptPhrase(queue.removeTop(), currentPhraseEndOffset - currentPhraseStartOffset, fragCharSize)) {
|
||||
wpil.add(phraseInfo);
|
||||
}
|
||||
} else {
|
||||
break;
|
||||
}
|
||||
}
|
||||
if (wpil.isEmpty()) {
|
||||
continue;
|
||||
}
|
||||
|
||||
final int matchLen = currentPhraseEndOffset - currentPhraseStartOffset;
|
||||
// now recalculate the start and end position to "center" the result
|
||||
final int newMargin = Math.max(0, (fragCharSize-matchLen)/2); // matchLen can be > fragCharSize prevent IAOOB here
|
||||
spanStart = currentPhraseStartOffset - newMargin;
|
||||
if (spanStart < startOffset) {
|
||||
spanStart = startOffset;
|
||||
}
|
||||
// whatever is bigger here we grow this out
|
||||
spanEnd = spanStart + Math.max(matchLen, fragCharSize);
|
||||
startOffset = spanEnd;
|
||||
fieldFragList.add(spanStart, spanEnd, wpil);
|
||||
}
|
||||
return fieldFragList;
|
||||
}
|
||||
|
||||
/**
|
||||
* A predicate to decide if the given {@link WeightedPhraseInfo} should be
|
||||
* accepted as a highlighted phrase or if it should be discarded.
|
||||
* <p>
|
||||
* The default implementation discards phrases that are composed of more than one term
|
||||
* and where the matchLength exceeds the fragment character size.
|
||||
*
|
||||
* @param info the phrase info to accept
|
||||
* @param matchLength the match length of the current phrase
|
||||
* @param fragCharSize the configured fragment character size
|
||||
* @return <code>true</code> if this phrase info should be accepted as a highligh phrase
|
||||
*/
|
||||
protected boolean acceptPhrase(WeightedPhraseInfo info, int matchLength, int fragCharSize) {
|
||||
return info.getTermsOffsets().size() <= 1 || matchLength <= fragCharSize;
|
||||
}
|
||||
|
||||
private static final class IteratorQueue<T> {
|
||||
private final Iterator<T> iter;
|
||||
private T top;
|
||||
|
||||
public IteratorQueue(Iterator<T> iter) {
|
||||
this.iter = iter;
|
||||
T removeTop = removeTop();
|
||||
assert removeTop == null;
|
||||
}
|
||||
|
||||
public T top() {
|
||||
return top;
|
||||
}
|
||||
|
||||
public T removeTop() {
|
||||
T currentTop = top;
|
||||
if (iter.hasNext()) {
|
||||
top = iter.next();
|
||||
} else {
|
||||
top = null;
|
||||
}
|
||||
return currentTop;
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
}
|
|
@ -1,332 +0,0 @@
|
|||
package org.apache.lucene.search.vectorhighlight;
|
||||
|
||||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
import org.apache.lucene.document.Field;
|
||||
import org.apache.lucene.document.FieldType;
|
||||
import org.apache.lucene.document.TextField;
|
||||
import org.apache.lucene.index.FieldInfo;
|
||||
import org.apache.lucene.index.IndexReader;
|
||||
import org.apache.lucene.index.StoredFieldVisitor;
|
||||
import org.apache.lucene.search.highlight.DefaultEncoder;
|
||||
import org.apache.lucene.search.highlight.Encoder;
|
||||
import org.apache.lucene.search.vectorhighlight.XFieldFragList.WeightedFragInfo;
|
||||
import org.apache.lucene.search.vectorhighlight.XFieldFragList.WeightedFragInfo.SubInfo;
|
||||
import org.apache.lucene.search.vectorhighlight.XFieldPhraseList.WeightedPhraseInfo.Toffs;
|
||||
import org.apache.lucene.util.CollectionUtil;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.util.*;
|
||||
|
||||
/**
|
||||
* Base FragmentsBuilder implementation that supports colored pre/post
|
||||
* tags and multivalued fields.
|
||||
* <p>
|
||||
* Uses {@link XBoundaryScanner} to determine fragments.
|
||||
*/
|
||||
//LUCENE MONITOR - REMOVE ME WHEN LUCENE 4.5 IS OUT
|
||||
public abstract class XBaseFragmentsBuilder implements XFragmentsBuilder {
|
||||
|
||||
protected String[] preTags, postTags;
|
||||
public static final String[] COLORED_PRE_TAGS = {
|
||||
"<b style=\"background:yellow\">", "<b style=\"background:lawngreen\">", "<b style=\"background:aquamarine\">",
|
||||
"<b style=\"background:magenta\">", "<b style=\"background:palegreen\">", "<b style=\"background:coral\">",
|
||||
"<b style=\"background:wheat\">", "<b style=\"background:khaki\">", "<b style=\"background:lime\">",
|
||||
"<b style=\"background:deepskyblue\">", "<b style=\"background:deeppink\">", "<b style=\"background:salmon\">",
|
||||
"<b style=\"background:peachpuff\">", "<b style=\"background:violet\">", "<b style=\"background:mediumpurple\">",
|
||||
"<b style=\"background:palegoldenrod\">", "<b style=\"background:darkkhaki\">", "<b style=\"background:springgreen\">",
|
||||
"<b style=\"background:turquoise\">", "<b style=\"background:powderblue\">"
|
||||
};
|
||||
public static final String[] COLORED_POST_TAGS = { "</b>" };
|
||||
private char multiValuedSeparator = ' ';
|
||||
private final BoundaryScanner boundaryScanner;
|
||||
private boolean discreteMultiValueHighlighting = false;
|
||||
|
||||
protected XBaseFragmentsBuilder(){
|
||||
this( new String[]{ "<b>" }, new String[]{ "</b>" } );
|
||||
}
|
||||
|
||||
protected XBaseFragmentsBuilder( String[] preTags, String[] postTags ){
|
||||
this(preTags, postTags, new SimpleBoundaryScanner());
|
||||
}
|
||||
|
||||
protected XBaseFragmentsBuilder(BoundaryScanner boundaryScanner){
|
||||
this( new String[]{ "<b>" }, new String[]{ "</b>" }, boundaryScanner );
|
||||
}
|
||||
|
||||
protected XBaseFragmentsBuilder( String[] preTags, String[] postTags, BoundaryScanner boundaryScanner ){
|
||||
this.preTags = preTags;
|
||||
this.postTags = postTags;
|
||||
this.boundaryScanner = boundaryScanner;
|
||||
}
|
||||
|
||||
static Object checkTagsArgument( Object tags ){
|
||||
if( tags instanceof String ) return tags;
|
||||
else if( tags instanceof String[] ) return tags;
|
||||
throw new IllegalArgumentException( "type of preTags/postTags must be a String or String[]" );
|
||||
}
|
||||
|
||||
public abstract List<WeightedFragInfo> getWeightedFragInfoList( List<WeightedFragInfo> src );
|
||||
|
||||
private static final Encoder NULL_ENCODER = new DefaultEncoder();
|
||||
|
||||
@Override
|
||||
public String createFragment( IndexReader reader, int docId,
|
||||
String fieldName, XFieldFragList fieldFragList ) throws IOException {
|
||||
return createFragment( reader, docId, fieldName, fieldFragList,
|
||||
preTags, postTags, NULL_ENCODER );
|
||||
}
|
||||
|
||||
@Override
|
||||
public String[] createFragments( IndexReader reader, int docId,
|
||||
String fieldName, XFieldFragList fieldFragList, int maxNumFragments )
|
||||
throws IOException {
|
||||
return createFragments( reader, docId, fieldName, fieldFragList, maxNumFragments,
|
||||
preTags, postTags, NULL_ENCODER );
|
||||
}
|
||||
|
||||
@Override
|
||||
public String createFragment( IndexReader reader, int docId,
|
||||
String fieldName, XFieldFragList fieldFragList, String[] preTags, String[] postTags,
|
||||
Encoder encoder ) throws IOException {
|
||||
String[] fragments = createFragments( reader, docId, fieldName, fieldFragList, 1,
|
||||
preTags, postTags, encoder );
|
||||
if( fragments == null || fragments.length == 0 ) return null;
|
||||
return fragments[0];
|
||||
}
|
||||
|
||||
@Override
|
||||
public String[] createFragments( IndexReader reader, int docId,
|
||||
String fieldName, XFieldFragList fieldFragList, int maxNumFragments,
|
||||
String[] preTags, String[] postTags, Encoder encoder ) throws IOException {
|
||||
|
||||
if( maxNumFragments < 0 ) {
|
||||
throw new IllegalArgumentException( "maxNumFragments(" + maxNumFragments + ") must be positive number." );
|
||||
}
|
||||
|
||||
List<WeightedFragInfo> fragInfos = fieldFragList.getFragInfos();
|
||||
Field[] values = getFields( reader, docId, fieldName );
|
||||
if( values.length == 0 ) {
|
||||
return null;
|
||||
}
|
||||
|
||||
if (discreteMultiValueHighlighting && values.length > 1) {
|
||||
fragInfos = discreteMultiValueHighlighting(fragInfos, values);
|
||||
}
|
||||
|
||||
fragInfos = getWeightedFragInfoList(fragInfos);
|
||||
int limitFragments = maxNumFragments < fragInfos.size() ? maxNumFragments : fragInfos.size();
|
||||
List<String> fragments = new ArrayList<String>( limitFragments );
|
||||
|
||||
StringBuilder buffer = new StringBuilder();
|
||||
int[] nextValueIndex = { 0 };
|
||||
for( int n = 0; n < limitFragments; n++ ){
|
||||
WeightedFragInfo fragInfo = fragInfos.get( n );
|
||||
fragments.add( makeFragment( buffer, nextValueIndex, values, fragInfo, preTags, postTags, encoder ) );
|
||||
}
|
||||
return fragments.toArray( new String[fragments.size()] );
|
||||
}
|
||||
|
||||
protected Field[] getFields( IndexReader reader, int docId, final String fieldName) throws IOException {
|
||||
// according to javadoc, doc.getFields(fieldName) cannot be used with lazy loaded field???
|
||||
final List<Field> fields = new ArrayList<Field>();
|
||||
reader.document(docId, new StoredFieldVisitor() {
|
||||
|
||||
@Override
|
||||
public void stringField(FieldInfo fieldInfo, String value) {
|
||||
FieldType ft = new FieldType(TextField.TYPE_STORED);
|
||||
ft.setStoreTermVectors(fieldInfo.hasVectors());
|
||||
fields.add(new Field(fieldInfo.name, value, ft));
|
||||
}
|
||||
|
||||
@Override
|
||||
public Status needsField(FieldInfo fieldInfo) {
|
||||
return fieldInfo.name.equals(fieldName) ? Status.YES : Status.NO;
|
||||
}
|
||||
});
|
||||
return fields.toArray(new Field[fields.size()]);
|
||||
}
|
||||
|
||||
protected String makeFragment( StringBuilder buffer, int[] index, Field[] values, WeightedFragInfo fragInfo,
|
||||
String[] preTags, String[] postTags, Encoder encoder ){
|
||||
StringBuilder fragment = new StringBuilder();
|
||||
final int s = fragInfo.getStartOffset();
|
||||
int[] modifiedStartOffset = { s };
|
||||
String src = getFragmentSourceMSO( buffer, index, values, s, fragInfo.getEndOffset(), modifiedStartOffset );
|
||||
int srcIndex = 0;
|
||||
for( SubInfo subInfo : fragInfo.getSubInfos() ){
|
||||
for( Toffs to : subInfo.getTermsOffsets() ){
|
||||
fragment
|
||||
.append( encoder.encodeText( src.substring( srcIndex, to.getStartOffset() - modifiedStartOffset[0] ) ) )
|
||||
.append( getPreTag( preTags, subInfo.getSeqnum() ) )
|
||||
.append( encoder.encodeText( src.substring( to.getStartOffset() - modifiedStartOffset[0], to.getEndOffset() - modifiedStartOffset[0] ) ) )
|
||||
.append( getPostTag( postTags, subInfo.getSeqnum() ) );
|
||||
srcIndex = to.getEndOffset() - modifiedStartOffset[0];
|
||||
}
|
||||
}
|
||||
fragment.append( encoder.encodeText( src.substring( srcIndex ) ) );
|
||||
return fragment.toString();
|
||||
}
|
||||
|
||||
protected String getFragmentSourceMSO( StringBuilder buffer, int[] index, Field[] values,
|
||||
int startOffset, int endOffset, int[] modifiedStartOffset ){
|
||||
while( buffer.length() < endOffset && index[0] < values.length ){
|
||||
buffer.append( values[index[0]++].stringValue() );
|
||||
buffer.append( getMultiValuedSeparator() );
|
||||
}
|
||||
int bufferLength = buffer.length();
|
||||
// we added the multi value char to the last buffer, ignore it
|
||||
if (values[index[0] - 1].fieldType().tokenized()) {
|
||||
bufferLength--;
|
||||
}
|
||||
int eo = bufferLength < endOffset ? bufferLength : boundaryScanner.findEndOffset( buffer, endOffset );
|
||||
modifiedStartOffset[0] = boundaryScanner.findStartOffset( buffer, startOffset );
|
||||
return buffer.substring( modifiedStartOffset[0], eo );
|
||||
}
|
||||
|
||||
protected String getFragmentSource( StringBuilder buffer, int[] index, Field[] values,
|
||||
int startOffset, int endOffset ){
|
||||
while( buffer.length() < endOffset && index[0] < values.length ){
|
||||
buffer.append( values[index[0]].stringValue() );
|
||||
buffer.append( multiValuedSeparator );
|
||||
index[0]++;
|
||||
}
|
||||
int eo = buffer.length() < endOffset ? buffer.length() : endOffset;
|
||||
return buffer.substring( startOffset, eo );
|
||||
}
|
||||
|
||||
protected List<WeightedFragInfo> discreteMultiValueHighlighting(List<WeightedFragInfo> fragInfos, Field[] fields) {
|
||||
Map<String, List<WeightedFragInfo>> fieldNameToFragInfos = new HashMap<String, List<WeightedFragInfo>>();
|
||||
for (Field field : fields) {
|
||||
fieldNameToFragInfos.put(field.name(), new ArrayList<WeightedFragInfo>());
|
||||
}
|
||||
|
||||
fragInfos: for (WeightedFragInfo fragInfo : fragInfos) {
|
||||
int fieldStart;
|
||||
int fieldEnd = 0;
|
||||
for (Field field : fields) {
|
||||
if (field.stringValue().isEmpty()) {
|
||||
fieldEnd++;
|
||||
continue;
|
||||
}
|
||||
fieldStart = fieldEnd;
|
||||
fieldEnd += field.stringValue().length() + 1; // + 1 for going to next field with same name.
|
||||
|
||||
if (fragInfo.getStartOffset() >= fieldStart && fragInfo.getEndOffset() >= fieldStart &&
|
||||
fragInfo.getStartOffset() <= fieldEnd && fragInfo.getEndOffset() <= fieldEnd) {
|
||||
fieldNameToFragInfos.get(field.name()).add(fragInfo);
|
||||
continue fragInfos;
|
||||
}
|
||||
|
||||
if (fragInfo.getSubInfos().isEmpty()) {
|
||||
continue fragInfos;
|
||||
}
|
||||
|
||||
Toffs firstToffs = fragInfo.getSubInfos().get(0).getTermsOffsets().get(0);
|
||||
if (fragInfo.getStartOffset() >= fieldEnd || firstToffs.getStartOffset() >= fieldEnd) {
|
||||
continue;
|
||||
}
|
||||
|
||||
int fragStart = fieldStart;
|
||||
if (fragInfo.getStartOffset() > fieldStart && fragInfo.getStartOffset() < fieldEnd) {
|
||||
fragStart = fragInfo.getStartOffset();
|
||||
}
|
||||
|
||||
int fragEnd = fieldEnd;
|
||||
if (fragInfo.getEndOffset() > fieldStart && fragInfo.getEndOffset() < fieldEnd) {
|
||||
fragEnd = fragInfo.getEndOffset();
|
||||
}
|
||||
|
||||
|
||||
List<SubInfo> subInfos = new ArrayList<SubInfo>();
|
||||
WeightedFragInfo weightedFragInfo = new WeightedFragInfo(fragStart, fragEnd, subInfos, fragInfo.getTotalBoost());
|
||||
|
||||
Iterator<SubInfo> subInfoIterator = fragInfo.getSubInfos().iterator();
|
||||
while (subInfoIterator.hasNext()) {
|
||||
SubInfo subInfo = subInfoIterator.next();
|
||||
List<Toffs> toffsList = new ArrayList<Toffs>();
|
||||
Iterator<Toffs> toffsIterator = subInfo.getTermsOffsets().iterator();
|
||||
while (toffsIterator.hasNext()) {
|
||||
Toffs toffs = toffsIterator.next();
|
||||
if (toffs.getStartOffset() >= fieldStart && toffs.getEndOffset() <= fieldEnd) {
|
||||
toffsList.add(toffs);
|
||||
toffsIterator.remove();
|
||||
}
|
||||
}
|
||||
if (!toffsList.isEmpty()) {
|
||||
subInfos.add(new SubInfo(subInfo.getText(), toffsList, subInfo.getSeqnum()));
|
||||
}
|
||||
|
||||
if (subInfo.getTermsOffsets().isEmpty()) {
|
||||
subInfoIterator.remove();
|
||||
}
|
||||
}
|
||||
fieldNameToFragInfos.get(field.name()).add(weightedFragInfo);
|
||||
}
|
||||
}
|
||||
|
||||
List<WeightedFragInfo> result = new ArrayList<WeightedFragInfo>();
|
||||
for (List<WeightedFragInfo> weightedFragInfos : fieldNameToFragInfos.values()) {
|
||||
result.addAll(weightedFragInfos);
|
||||
}
|
||||
CollectionUtil.timSort(result, new Comparator<WeightedFragInfo>() {
|
||||
|
||||
@Override
|
||||
public int compare(XFieldFragList.WeightedFragInfo info1, XFieldFragList.WeightedFragInfo info2) {
|
||||
return info1.getStartOffset() - info2.getStartOffset();
|
||||
}
|
||||
|
||||
});
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
public void setMultiValuedSeparator( char separator ){
|
||||
multiValuedSeparator = separator;
|
||||
}
|
||||
|
||||
public char getMultiValuedSeparator(){
|
||||
return multiValuedSeparator;
|
||||
}
|
||||
|
||||
public boolean isDiscreteMultiValueHighlighting() {
|
||||
return discreteMultiValueHighlighting;
|
||||
}
|
||||
|
||||
public void setDiscreteMultiValueHighlighting(boolean discreteMultiValueHighlighting) {
|
||||
this.discreteMultiValueHighlighting = discreteMultiValueHighlighting;
|
||||
}
|
||||
|
||||
protected String getPreTag( int num ){
|
||||
return getPreTag( preTags, num );
|
||||
}
|
||||
|
||||
protected String getPostTag( int num ){
|
||||
return getPostTag( postTags, num );
|
||||
}
|
||||
|
||||
protected String getPreTag( String[] preTags, int num ){
|
||||
int n = num % preTags.length;
|
||||
return preTags[n];
|
||||
}
|
||||
|
||||
protected String getPostTag( String[] postTags, int num ){
|
||||
int n = num % postTags.length;
|
||||
return postTags[n];
|
||||
}
|
||||
}
|
|
@ -1,223 +0,0 @@
|
|||
package org.apache.lucene.search.vectorhighlight;
|
||||
|
||||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
import org.apache.lucene.index.IndexReader;
|
||||
import org.apache.lucene.search.Query;
|
||||
import org.apache.lucene.search.highlight.Encoder;
|
||||
|
||||
import java.io.IOException;
|
||||
|
||||
/**
|
||||
* Another highlighter implementation.
|
||||
*
|
||||
*/
|
||||
//LUCENE MONITOR - REMOVE ME WHEN LUCENE 4.5 IS OUT
|
||||
public class XFastVectorHighlighter {
|
||||
|
||||
public static final boolean DEFAULT_PHRASE_HIGHLIGHT = true;
|
||||
public static final boolean DEFAULT_FIELD_MATCH = true;
|
||||
private final boolean phraseHighlight;
|
||||
private final boolean fieldMatch;
|
||||
private final XFragListBuilder fragListBuilder;
|
||||
private final XFragmentsBuilder fragmentsBuilder;
|
||||
private int phraseLimit = Integer.MAX_VALUE;
|
||||
|
||||
/**
|
||||
* the default constructor.
|
||||
*/
|
||||
public XFastVectorHighlighter(){
|
||||
this( DEFAULT_PHRASE_HIGHLIGHT, DEFAULT_FIELD_MATCH );
|
||||
}
|
||||
|
||||
/**
|
||||
* a constructor. Using {@link XSimpleFragListBuilder} and {@link XScoreOrderFragmentsBuilder}.
|
||||
*
|
||||
* @param phraseHighlight true or false for phrase highlighting
|
||||
* @param fieldMatch true of false for field matching
|
||||
*/
|
||||
public XFastVectorHighlighter( boolean phraseHighlight, boolean fieldMatch ){
|
||||
this( phraseHighlight, fieldMatch, new XSimpleFragListBuilder(), new XScoreOrderFragmentsBuilder() );
|
||||
}
|
||||
|
||||
/**
|
||||
* a constructor. A {@link XFragListBuilder} and a {@link XFragmentsBuilder} can be specified (plugins).
|
||||
*
|
||||
* @param phraseHighlight true of false for phrase highlighting
|
||||
* @param fieldMatch true of false for field matching
|
||||
* @param fragListBuilder an instance of {@link XFragListBuilder}
|
||||
* @param fragmentsBuilder an instance of {@link XFragmentsBuilder}
|
||||
*/
|
||||
public XFastVectorHighlighter( boolean phraseHighlight, boolean fieldMatch,
|
||||
XFragListBuilder fragListBuilder, XFragmentsBuilder fragmentsBuilder ){
|
||||
this.phraseHighlight = phraseHighlight;
|
||||
this.fieldMatch = fieldMatch;
|
||||
this.fragListBuilder = fragListBuilder;
|
||||
this.fragmentsBuilder = fragmentsBuilder;
|
||||
}
|
||||
|
||||
/**
|
||||
* create a {@link XFieldQuery} object.
|
||||
*
|
||||
* @param query a query
|
||||
* @return the created {@link XFieldQuery} object
|
||||
*/
|
||||
public XFieldQuery getFieldQuery( Query query ) {
|
||||
// TODO: should we deprecate this?
|
||||
// because if there is no reader, then we cannot rewrite MTQ.
|
||||
try {
|
||||
return new XFieldQuery( query, null, phraseHighlight, fieldMatch );
|
||||
} catch (IOException e) {
|
||||
// should never be thrown when reader is null
|
||||
throw new RuntimeException (e);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* create a {@link XFieldQuery} object.
|
||||
*
|
||||
* @param query a query
|
||||
* @return the created {@link XFieldQuery} object
|
||||
*/
|
||||
public XFieldQuery getFieldQuery( Query query, IndexReader reader ) throws IOException {
|
||||
return new XFieldQuery( query, reader, phraseHighlight, fieldMatch );
|
||||
}
|
||||
|
||||
/**
|
||||
* return the best fragment.
|
||||
*
|
||||
* @param fieldQuery {@link XFieldQuery} object
|
||||
* @param reader {@link IndexReader} of the index
|
||||
* @param docId document id to be highlighted
|
||||
* @param fieldName field of the document to be highlighted
|
||||
* @param fragCharSize the length (number of chars) of a fragment
|
||||
* @return the best fragment (snippet) string
|
||||
* @throws IOException If there is a low-level I/O error
|
||||
*/
|
||||
public final String getBestFragment( final XFieldQuery fieldQuery, IndexReader reader, int docId,
|
||||
String fieldName, int fragCharSize ) throws IOException {
|
||||
XFieldFragList fieldFragList =
|
||||
getFieldFragList( fragListBuilder, fieldQuery, reader, docId, fieldName, fragCharSize );
|
||||
return fragmentsBuilder.createFragment( reader, docId, fieldName, fieldFragList );
|
||||
}
|
||||
|
||||
/**
|
||||
* return the best fragments.
|
||||
*
|
||||
* @param fieldQuery {@link XFieldQuery} object
|
||||
* @param reader {@link IndexReader} of the index
|
||||
* @param docId document id to be highlighted
|
||||
* @param fieldName field of the document to be highlighted
|
||||
* @param fragCharSize the length (number of chars) of a fragment
|
||||
* @param maxNumFragments maximum number of fragments
|
||||
* @return created fragments or null when no fragments created.
|
||||
* size of the array can be less than maxNumFragments
|
||||
* @throws IOException If there is a low-level I/O error
|
||||
*/
|
||||
public final String[] getBestFragments( final XFieldQuery fieldQuery, IndexReader reader, int docId,
|
||||
String fieldName, int fragCharSize, int maxNumFragments ) throws IOException {
|
||||
XFieldFragList fieldFragList =
|
||||
getFieldFragList( fragListBuilder, fieldQuery, reader, docId, fieldName, fragCharSize );
|
||||
return fragmentsBuilder.createFragments( reader, docId, fieldName, fieldFragList, maxNumFragments );
|
||||
}
|
||||
|
||||
/**
|
||||
* return the best fragment.
|
||||
*
|
||||
* @param fieldQuery {@link XFieldQuery} object
|
||||
* @param reader {@link IndexReader} of the index
|
||||
* @param docId document id to be highlighted
|
||||
* @param fieldName field of the document to be highlighted
|
||||
* @param fragCharSize the length (number of chars) of a fragment
|
||||
* @param fragListBuilder {@link XFragListBuilder} object
|
||||
* @param fragmentsBuilder {@link XFragmentsBuilder} object
|
||||
* @param preTags pre-tags to be used to highlight terms
|
||||
* @param postTags post-tags to be used to highlight terms
|
||||
* @param encoder an encoder that generates encoded text
|
||||
* @return the best fragment (snippet) string
|
||||
* @throws IOException If there is a low-level I/O error
|
||||
*/
|
||||
public final String getBestFragment( final XFieldQuery fieldQuery, IndexReader reader, int docId,
|
||||
String fieldName, int fragCharSize,
|
||||
XFragListBuilder fragListBuilder, XFragmentsBuilder fragmentsBuilder,
|
||||
String[] preTags, String[] postTags, Encoder encoder ) throws IOException {
|
||||
XFieldFragList fieldFragList = getFieldFragList( fragListBuilder, fieldQuery, reader, docId, fieldName, fragCharSize );
|
||||
return fragmentsBuilder.createFragment( reader, docId, fieldName, fieldFragList, preTags, postTags, encoder );
|
||||
}
|
||||
|
||||
/**
|
||||
* return the best fragments.
|
||||
*
|
||||
* @param fieldQuery {@link XFieldQuery} object
|
||||
* @param reader {@link IndexReader} of the index
|
||||
* @param docId document id to be highlighted
|
||||
* @param fieldName field of the document to be highlighted
|
||||
* @param fragCharSize the length (number of chars) of a fragment
|
||||
* @param maxNumFragments maximum number of fragments
|
||||
* @param fragListBuilder {@link XFragListBuilder} object
|
||||
* @param fragmentsBuilder {@link XFragmentsBuilder} object
|
||||
* @param preTags pre-tags to be used to highlight terms
|
||||
* @param postTags post-tags to be used to highlight terms
|
||||
* @param encoder an encoder that generates encoded text
|
||||
* @return created fragments or null when no fragments created.
|
||||
* size of the array can be less than maxNumFragments
|
||||
* @throws IOException If there is a low-level I/O error
|
||||
*/
|
||||
public final String[] getBestFragments( final XFieldQuery fieldQuery, IndexReader reader, int docId,
|
||||
String fieldName, int fragCharSize, int maxNumFragments,
|
||||
XFragListBuilder fragListBuilder, XFragmentsBuilder fragmentsBuilder,
|
||||
String[] preTags, String[] postTags, Encoder encoder ) throws IOException {
|
||||
XFieldFragList fieldFragList =
|
||||
getFieldFragList( fragListBuilder, fieldQuery, reader, docId, fieldName, fragCharSize );
|
||||
return fragmentsBuilder.createFragments( reader, docId, fieldName, fieldFragList, maxNumFragments,
|
||||
preTags, postTags, encoder );
|
||||
}
|
||||
|
||||
private XFieldFragList getFieldFragList( XFragListBuilder fragListBuilder,
|
||||
final XFieldQuery fieldQuery, IndexReader reader, int docId,
|
||||
String fieldName, int fragCharSize ) throws IOException {
|
||||
XFieldTermStack fieldTermStack = new XFieldTermStack( reader, docId, fieldName, fieldQuery );
|
||||
XFieldPhraseList fieldPhraseList = new XFieldPhraseList( fieldTermStack, fieldQuery, phraseLimit );
|
||||
return fragListBuilder.createFieldFragList( fieldPhraseList, fragCharSize );
|
||||
}
|
||||
|
||||
/**
|
||||
* return whether phraseHighlight or not.
|
||||
*
|
||||
* @return whether phraseHighlight or not
|
||||
*/
|
||||
public boolean isPhraseHighlight(){ return phraseHighlight; }
|
||||
|
||||
/**
|
||||
* return whether fieldMatch or not.
|
||||
*
|
||||
* @return whether fieldMatch or not
|
||||
*/
|
||||
public boolean isFieldMatch(){ return fieldMatch; }
|
||||
|
||||
/**
|
||||
* @return the maximum number of phrases to analyze when searching for the highest-scoring phrase.
|
||||
*/
|
||||
public int getPhraseLimit () { return phraseLimit; }
|
||||
|
||||
/**
|
||||
* set the maximum number of phrases to analyze when searching for the highest-scoring phrase.
|
||||
* The default is unlimited (Integer.MAX_VALUE).
|
||||
*/
|
||||
public void setPhraseLimit (int phraseLimit) { this.phraseLimit = phraseLimit; }
|
||||
}
|
|
@ -1,142 +0,0 @@
|
|||
package org.apache.lucene.search.vectorhighlight;
|
||||
|
||||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
import org.apache.lucene.search.vectorhighlight.XFieldPhraseList.WeightedPhraseInfo;
|
||||
import org.apache.lucene.search.vectorhighlight.XFieldPhraseList.WeightedPhraseInfo.Toffs;
|
||||
|
||||
import java.util.ArrayList;
|
||||
import java.util.List;
|
||||
|
||||
/**
|
||||
* FieldFragList has a list of "frag info" that is used by FragmentsBuilder class
|
||||
* to create fragments (snippets).
|
||||
*/
|
||||
//LUCENE MONITOR - REMOVE ME WHEN LUCENE 4.5 IS OUT
|
||||
public abstract class XFieldFragList {
|
||||
|
||||
private List<WeightedFragInfo> fragInfos = new ArrayList<WeightedFragInfo>();
|
||||
|
||||
/**
|
||||
* a constructor.
|
||||
*
|
||||
* @param fragCharSize the length (number of chars) of a fragment
|
||||
*/
|
||||
public XFieldFragList( int fragCharSize ){
|
||||
}
|
||||
|
||||
/**
|
||||
* convert the list of WeightedPhraseInfo to WeightedFragInfo, then add it to the fragInfos
|
||||
*
|
||||
* @param startOffset start offset of the fragment
|
||||
* @param endOffset end offset of the fragment
|
||||
* @param phraseInfoList list of WeightedPhraseInfo objects
|
||||
*/
|
||||
public abstract void add( int startOffset, int endOffset, List<WeightedPhraseInfo> phraseInfoList );
|
||||
|
||||
/**
|
||||
* return the list of WeightedFragInfos.
|
||||
*
|
||||
* @return fragInfos.
|
||||
*/
|
||||
public List<WeightedFragInfo> getFragInfos() {
|
||||
return fragInfos;
|
||||
}
|
||||
|
||||
/**
|
||||
* List of term offsets + weight for a frag info
|
||||
*/
|
||||
public static class WeightedFragInfo {
|
||||
|
||||
private List<SubInfo> subInfos;
|
||||
private float totalBoost;
|
||||
private int startOffset;
|
||||
private int endOffset;
|
||||
|
||||
public WeightedFragInfo( int startOffset, int endOffset, List<SubInfo> subInfos, float totalBoost ){
|
||||
this.startOffset = startOffset;
|
||||
this.endOffset = endOffset;
|
||||
this.totalBoost = totalBoost;
|
||||
this.subInfos = subInfos;
|
||||
}
|
||||
|
||||
public List<SubInfo> getSubInfos(){
|
||||
return subInfos;
|
||||
}
|
||||
|
||||
public float getTotalBoost(){
|
||||
return totalBoost;
|
||||
}
|
||||
|
||||
public int getStartOffset(){
|
||||
return startOffset;
|
||||
}
|
||||
|
||||
public int getEndOffset(){
|
||||
return endOffset;
|
||||
}
|
||||
|
||||
@Override
|
||||
public String toString(){
|
||||
StringBuilder sb = new StringBuilder();
|
||||
sb.append( "subInfos=(" );
|
||||
for( SubInfo si : subInfos )
|
||||
sb.append( si.toString() );
|
||||
sb.append( ")/" ).append( totalBoost ).append( '(' ).append( startOffset ).append( ',' ).append( endOffset ).append( ')' );
|
||||
return sb.toString();
|
||||
}
|
||||
|
||||
/**
|
||||
* Represents the list of term offsets for some text
|
||||
*/
|
||||
public static class SubInfo {
|
||||
private final String text; // unnecessary member, just exists for debugging purpose
|
||||
private final List<Toffs> termsOffsets; // usually termsOffsets.size() == 1,
|
||||
// but if position-gap > 1 and slop > 0 then size() could be greater than 1
|
||||
private int seqnum;
|
||||
|
||||
public SubInfo( String text, List<Toffs> termsOffsets, int seqnum ){
|
||||
this.text = text;
|
||||
this.termsOffsets = termsOffsets;
|
||||
this.seqnum = seqnum;
|
||||
}
|
||||
|
||||
public List<Toffs> getTermsOffsets(){
|
||||
return termsOffsets;
|
||||
}
|
||||
|
||||
public int getSeqnum(){
|
||||
return seqnum;
|
||||
}
|
||||
|
||||
public String getText(){
|
||||
return text;
|
||||
}
|
||||
|
||||
@Override
|
||||
public String toString(){
|
||||
StringBuilder sb = new StringBuilder();
|
||||
sb.append( text ).append( '(' );
|
||||
for( Toffs to : termsOffsets )
|
||||
sb.append( to.toString() );
|
||||
sb.append( ')' );
|
||||
return sb.toString();
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
|
@ -1,264 +0,0 @@
|
|||
package org.apache.lucene.search.vectorhighlight;
|
||||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
import org.apache.lucene.search.vectorhighlight.XFieldQuery.QueryPhraseMap;
|
||||
import org.apache.lucene.search.vectorhighlight.XFieldTermStack.TermInfo;
|
||||
|
||||
import java.util.ArrayList;
|
||||
import java.util.LinkedList;
|
||||
import java.util.List;
|
||||
|
||||
/**
|
||||
* FieldPhraseList has a list of WeightedPhraseInfo that is used by FragListBuilder
|
||||
* to create a FieldFragList object.
|
||||
*/
|
||||
//LUCENE MONITOR - REMOVE ME WHEN LUCENE 4.5 IS OUT
|
||||
public class XFieldPhraseList {
|
||||
|
||||
LinkedList<WeightedPhraseInfo> phraseList = new LinkedList<WeightedPhraseInfo>();
|
||||
|
||||
/**
|
||||
* create a FieldPhraseList that has no limit on the number of phrases to analyze
|
||||
*
|
||||
* @param fieldTermStack FieldTermStack object
|
||||
* @param fieldQuery FieldQuery object
|
||||
*/
|
||||
public XFieldPhraseList( XFieldTermStack fieldTermStack, XFieldQuery fieldQuery){
|
||||
this (fieldTermStack, fieldQuery, Integer.MAX_VALUE);
|
||||
}
|
||||
|
||||
/**
|
||||
* return the list of WeightedPhraseInfo.
|
||||
*
|
||||
* @return phraseList.
|
||||
*/
|
||||
public List<WeightedPhraseInfo> getPhraseList() {
|
||||
return phraseList;
|
||||
}
|
||||
|
||||
/**
|
||||
* a constructor.
|
||||
*
|
||||
* @param fieldTermStack FieldTermStack object
|
||||
* @param fieldQuery FieldQuery object
|
||||
* @param phraseLimit maximum size of phraseList
|
||||
*/
|
||||
public XFieldPhraseList( XFieldTermStack fieldTermStack, XFieldQuery fieldQuery, int phraseLimit ){
|
||||
final String field = fieldTermStack.getFieldName();
|
||||
|
||||
QueryPhraseMap qpm = fieldQuery.getRootMap(field);
|
||||
if (qpm != null) {
|
||||
LinkedList<TermInfo> phraseCandidate = new LinkedList<TermInfo>();
|
||||
extractPhrases(fieldTermStack.termList, qpm, phraseCandidate, 0);
|
||||
assert phraseCandidate.size() == 0;
|
||||
}
|
||||
}
|
||||
|
||||
void extractPhrases(LinkedList<TermInfo> terms, QueryPhraseMap currMap, LinkedList<TermInfo> phraseCandidate, int longest) {
|
||||
if (phraseCandidate.size() > 1 && phraseCandidate.getLast().getPosition() - phraseCandidate.getFirst().getPosition() > currMap.getMaxPhraseWindow()) {
|
||||
return;
|
||||
}
|
||||
if (terms.isEmpty()) {
|
||||
if (longest > 0) {
|
||||
addIfNoOverlap( new WeightedPhraseInfo( phraseCandidate.subList(0, longest), currMap.getBoost(), currMap.getTermOrPhraseNumber() ) );
|
||||
}
|
||||
return;
|
||||
}
|
||||
ArrayList<TermInfo> samePositionTerms = new ArrayList<TermInfo>();
|
||||
do {
|
||||
samePositionTerms.add(terms.pop());
|
||||
} while (!terms.isEmpty() && terms.get(0).getPosition() == samePositionTerms.get(0).getPosition());
|
||||
|
||||
// try all next terms at the same position
|
||||
for (TermInfo nextTerm : samePositionTerms) {
|
||||
QueryPhraseMap nextMap = currMap.getTermMap(nextTerm.getText());
|
||||
if (nextMap != null) {
|
||||
phraseCandidate.add(nextTerm);
|
||||
int l = longest;
|
||||
if(nextMap.isValidTermOrPhrase( phraseCandidate ) ){
|
||||
l = phraseCandidate.size();
|
||||
}
|
||||
extractPhrases(terms, nextMap, phraseCandidate, l);
|
||||
phraseCandidate.removeLast();
|
||||
}
|
||||
}
|
||||
|
||||
// ignore the next term
|
||||
extractPhrases(terms, currMap, phraseCandidate, longest);
|
||||
|
||||
// add terms back
|
||||
for (TermInfo nextTerm : samePositionTerms) {
|
||||
terms.push(nextTerm);
|
||||
}
|
||||
}
|
||||
|
||||
public void addIfNoOverlap( WeightedPhraseInfo wpi ){
|
||||
for( WeightedPhraseInfo existWpi : getPhraseList() ){
|
||||
if( existWpi.isOffsetOverlap( wpi ) ) {
|
||||
// WeightedPhraseInfo.addIfNoOverlap() dumps the second part of, for example, hyphenated words (social-economics).
|
||||
// The result is that all informations in TermInfo are lost and not available for further operations.
|
||||
existWpi.getTermsInfos().addAll( wpi.getTermsInfos() );
|
||||
return;
|
||||
}
|
||||
}
|
||||
getPhraseList().add( wpi );
|
||||
}
|
||||
|
||||
/**
|
||||
* Represents the list of term offsets and boost for some text
|
||||
*/
|
||||
public static class WeightedPhraseInfo {
|
||||
|
||||
private String text; // unnecessary member, just exists for debugging purpose
|
||||
private List<Toffs> termsOffsets; // usually termsOffsets.size() == 1,
|
||||
// but if position-gap > 1 and slop > 0 then size() could be greater than 1
|
||||
private float boost; // query boost
|
||||
private int seqnum;
|
||||
|
||||
private ArrayList<TermInfo> termsInfos;
|
||||
|
||||
/**
|
||||
* @return the text
|
||||
*/
|
||||
public String getText() {
|
||||
return text;
|
||||
}
|
||||
|
||||
/**
|
||||
* @return the termsOffsets
|
||||
*/
|
||||
public List<Toffs> getTermsOffsets() {
|
||||
return termsOffsets;
|
||||
}
|
||||
|
||||
/**
|
||||
* @return the boost
|
||||
*/
|
||||
public float getBoost() {
|
||||
return boost;
|
||||
}
|
||||
|
||||
/**
|
||||
* @return the termInfos
|
||||
*/
|
||||
public List<TermInfo> getTermsInfos() {
|
||||
return termsInfos;
|
||||
}
|
||||
|
||||
public WeightedPhraseInfo( List<TermInfo> terms, float boost ){
|
||||
this( terms, boost, 0 );
|
||||
}
|
||||
|
||||
public WeightedPhraseInfo( List<TermInfo> terms, float boost, int seqnum ){
|
||||
this.boost = boost;
|
||||
this.seqnum = seqnum;
|
||||
|
||||
// We keep TermInfos for further operations
|
||||
termsInfos = new ArrayList<TermInfo>( terms );
|
||||
|
||||
termsOffsets = new ArrayList<Toffs>( terms.size() );
|
||||
TermInfo ti = terms.get( 0 );
|
||||
termsOffsets.add( new Toffs( ti.getStartOffset(), ti.getEndOffset() ) );
|
||||
if( terms.size() == 1 ){
|
||||
text = ti.getText();
|
||||
return;
|
||||
}
|
||||
StringBuilder sb = new StringBuilder();
|
||||
sb.append( ti.getText() );
|
||||
int pos = ti.getPosition();
|
||||
for( int i = 1; i < terms.size(); i++ ){
|
||||
ti = terms.get( i );
|
||||
sb.append( ti.getText() );
|
||||
if( ti.getPosition() - pos == 1 ){
|
||||
Toffs to = termsOffsets.get( termsOffsets.size() - 1 );
|
||||
to.setEndOffset( ti.getEndOffset() );
|
||||
}
|
||||
else{
|
||||
termsOffsets.add( new Toffs( ti.getStartOffset(), ti.getEndOffset() ) );
|
||||
}
|
||||
pos = ti.getPosition();
|
||||
}
|
||||
text = sb.toString();
|
||||
}
|
||||
|
||||
public int getStartOffset(){
|
||||
return termsOffsets.get( 0 ).startOffset;
|
||||
}
|
||||
|
||||
public int getEndOffset(){
|
||||
return termsOffsets.get( termsOffsets.size() - 1 ).endOffset;
|
||||
}
|
||||
|
||||
public boolean isOffsetOverlap( WeightedPhraseInfo other ){
|
||||
int so = getStartOffset();
|
||||
int eo = getEndOffset();
|
||||
int oso = other.getStartOffset();
|
||||
int oeo = other.getEndOffset();
|
||||
if( so <= oso && oso < eo ) return true;
|
||||
if( so < oeo && oeo <= eo ) return true;
|
||||
if( oso <= so && so < oeo ) return true;
|
||||
if( oso < eo && eo <= oeo ) return true;
|
||||
return false;
|
||||
}
|
||||
|
||||
@Override
|
||||
public String toString(){
|
||||
StringBuilder sb = new StringBuilder();
|
||||
sb.append( text ).append( '(' ).append( boost ).append( ")(" );
|
||||
for( Toffs to : termsOffsets ){
|
||||
sb.append( to );
|
||||
}
|
||||
sb.append( ')' );
|
||||
return sb.toString();
|
||||
}
|
||||
|
||||
/**
|
||||
* @return the seqnum
|
||||
*/
|
||||
public int getSeqnum() {
|
||||
return seqnum;
|
||||
}
|
||||
|
||||
/**
|
||||
* Term offsets (start + end)
|
||||
*/
|
||||
public static class Toffs {
|
||||
private int startOffset;
|
||||
private int endOffset;
|
||||
public Toffs( int startOffset, int endOffset ){
|
||||
this.startOffset = startOffset;
|
||||
this.endOffset = endOffset;
|
||||
}
|
||||
public void setEndOffset( int endOffset ){
|
||||
this.endOffset = endOffset;
|
||||
}
|
||||
public int getStartOffset(){
|
||||
return startOffset;
|
||||
}
|
||||
public int getEndOffset(){
|
||||
return endOffset;
|
||||
}
|
||||
@Override
|
||||
public String toString(){
|
||||
StringBuilder sb = new StringBuilder();
|
||||
sb.append( '(' ).append( startOffset ).append( ',' ).append( endOffset ).append( ')' );
|
||||
return sb.toString();
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
|
@ -1,519 +0,0 @@
|
|||
package org.apache.lucene.search.vectorhighlight;
|
||||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
import org.apache.lucene.index.IndexReader;
|
||||
import org.apache.lucene.index.Term;
|
||||
import org.apache.lucene.search.*;
|
||||
import org.apache.lucene.search.vectorhighlight.XFieldTermStack.TermInfo;
|
||||
import org.apache.lucene.util.InPlaceMergeSorter;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.util.*;
|
||||
|
||||
/**
|
||||
* FieldQuery breaks down query object into terms/phrases and keeps
|
||||
* them in a QueryPhraseMap structure.
|
||||
*/
|
||||
//LUCENE MONITOR - REMOVE ME WHEN LUCENE 4.5 IS OUT
|
||||
public class XFieldQuery {
|
||||
|
||||
final boolean fieldMatch;
|
||||
|
||||
// fieldMatch==true, Map<fieldName,QueryPhraseMap>
|
||||
// fieldMatch==false, Map<null,QueryPhraseMap>
|
||||
Map<String, QueryPhraseMap> rootMaps = new HashMap<String, QueryPhraseMap>();
|
||||
|
||||
// fieldMatch==true, Map<fieldName,setOfTermsInQueries>
|
||||
// fieldMatch==false, Map<null,setOfTermsInQueries>
|
||||
Map<String, Set<String>> termSetMap = new HashMap<String, Set<String>>();
|
||||
|
||||
int termOrPhraseNumber; // used for colored tag support
|
||||
|
||||
private int maxPhraseWindow = 1;
|
||||
|
||||
// The maximum number of different matching terms accumulated from any one MultiTermQuery
|
||||
private static final int MAX_MTQ_TERMS = 1024;
|
||||
|
||||
XFieldQuery( Query query, IndexReader reader, boolean phraseHighlight, boolean fieldMatch ) throws IOException {
|
||||
this.fieldMatch = fieldMatch;
|
||||
Set<Query> flatQueries = new LinkedHashSet<Query>();
|
||||
flatten( query, reader, flatQueries );
|
||||
saveTerms( flatQueries, reader );
|
||||
Collection<Query> expandQueries = expand( flatQueries );
|
||||
|
||||
for( Query flatQuery : expandQueries ){
|
||||
QueryPhraseMap rootMap = getRootMap( flatQuery );
|
||||
rootMap.add( flatQuery, reader );
|
||||
if( !phraseHighlight && flatQuery instanceof PhraseQuery ){
|
||||
PhraseQuery pq = (PhraseQuery)flatQuery;
|
||||
if( pq.getTerms().length > 1 ){
|
||||
for( Term term : pq.getTerms() )
|
||||
rootMap.addTerm( term, flatQuery.getBoost() );
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/** For backwards compatibility you can initialize FieldQuery without
|
||||
* an IndexReader, which is only required to support MultiTermQuery
|
||||
*/
|
||||
XFieldQuery( Query query, boolean phraseHighlight, boolean fieldMatch ) throws IOException {
|
||||
this (query, null, phraseHighlight, fieldMatch);
|
||||
}
|
||||
|
||||
void flatten( Query sourceQuery, IndexReader reader, Collection<Query> flatQueries ) throws IOException{
|
||||
if( sourceQuery instanceof BooleanQuery ){
|
||||
BooleanQuery bq = (BooleanQuery)sourceQuery;
|
||||
for( BooleanClause clause : bq.getClauses() ){
|
||||
if( !clause.isProhibited() )
|
||||
flatten( clause.getQuery(), reader, flatQueries );
|
||||
}
|
||||
} else if( sourceQuery instanceof DisjunctionMaxQuery ){
|
||||
DisjunctionMaxQuery dmq = (DisjunctionMaxQuery)sourceQuery;
|
||||
for( Query query : dmq ){
|
||||
flatten( query, reader, flatQueries );
|
||||
}
|
||||
}
|
||||
else if( sourceQuery instanceof TermQuery ){
|
||||
if( !flatQueries.contains( sourceQuery ) )
|
||||
flatQueries.add( sourceQuery );
|
||||
}
|
||||
else if( sourceQuery instanceof PhraseQuery ){
|
||||
if( !flatQueries.contains( sourceQuery ) ){
|
||||
PhraseQuery pq = (PhraseQuery)sourceQuery;
|
||||
if( pq.getTerms().length > 1 )
|
||||
flatQueries.add( pq );
|
||||
else if( pq.getTerms().length == 1 ){
|
||||
flatQueries.add( new TermQuery( pq.getTerms()[0] ) );
|
||||
}
|
||||
}
|
||||
} else if (sourceQuery instanceof ConstantScoreQuery) {
|
||||
final Query q = ((ConstantScoreQuery) sourceQuery).getQuery();
|
||||
if (q != null) {
|
||||
flatten(q, reader, flatQueries);
|
||||
}
|
||||
} else if (sourceQuery instanceof FilteredQuery) {
|
||||
final Query q = ((FilteredQuery) sourceQuery).getQuery();
|
||||
if (q != null) {
|
||||
flatten(q, reader, flatQueries);
|
||||
}
|
||||
} else if (reader != null){
|
||||
Query query = sourceQuery;
|
||||
if (sourceQuery instanceof MultiTermQuery) {
|
||||
MultiTermQuery copy = (MultiTermQuery) sourceQuery.clone();
|
||||
copy.setRewriteMethod(new MultiTermQuery.TopTermsScoringBooleanQueryRewrite(MAX_MTQ_TERMS));
|
||||
query = copy;
|
||||
}
|
||||
Query rewritten = query.rewrite(reader);
|
||||
if (rewritten != query) {
|
||||
// only rewrite once and then flatten again - the rewritten query could have a speacial treatment
|
||||
// if this method is overwritten in a subclass.
|
||||
flatten(rewritten, reader, flatQueries);
|
||||
|
||||
}
|
||||
// if the query is already rewritten we discard it
|
||||
}
|
||||
// else discard queries
|
||||
}
|
||||
|
||||
/*
|
||||
* Create expandQueries from flatQueries.
|
||||
*
|
||||
* expandQueries := flatQueries + overlapped phrase queries
|
||||
*
|
||||
* ex1) flatQueries={a,b,c}
|
||||
* => expandQueries={a,b,c}
|
||||
* ex2) flatQueries={a,"b c","c d"}
|
||||
* => expandQueries={a,"b c","c d","b c d"}
|
||||
*/
|
||||
Collection<Query> expand( Collection<Query> flatQueries ){
|
||||
Set<Query> expandQueries = new LinkedHashSet<Query>();
|
||||
for( Iterator<Query> i = flatQueries.iterator(); i.hasNext(); ){
|
||||
Query query = i.next();
|
||||
i.remove();
|
||||
expandQueries.add( query );
|
||||
if( !( query instanceof PhraseQuery ) ) continue;
|
||||
for( Iterator<Query> j = flatQueries.iterator(); j.hasNext(); ){
|
||||
Query qj = j.next();
|
||||
if( !( qj instanceof PhraseQuery ) ) continue;
|
||||
checkOverlap( expandQueries, (PhraseQuery)query, (PhraseQuery)qj );
|
||||
}
|
||||
}
|
||||
return expandQueries;
|
||||
}
|
||||
|
||||
/*
|
||||
* Check if PhraseQuery A and B have overlapped part.
|
||||
*
|
||||
* ex1) A="a b", B="b c" => overlap; expandQueries={"a b c"}
|
||||
* ex2) A="b c", B="a b" => overlap; expandQueries={"a b c"}
|
||||
* ex3) A="a b", B="c d" => no overlap; expandQueries={}
|
||||
*/
|
||||
private void checkOverlap( Collection<Query> expandQueries, PhraseQuery a, PhraseQuery b ){
|
||||
if( a.getSlop() != b.getSlop() ) return;
|
||||
Term[] ats = a.getTerms();
|
||||
Term[] bts = b.getTerms();
|
||||
if( fieldMatch && !ats[0].field().equals( bts[0].field() ) ) return;
|
||||
checkOverlap( expandQueries, ats, bts, a.getSlop(), a.getBoost() );
|
||||
checkOverlap( expandQueries, bts, ats, b.getSlop(), b.getBoost() );
|
||||
}
|
||||
|
||||
/*
|
||||
* Check if src and dest have overlapped part and if it is, create PhraseQueries and add expandQueries.
|
||||
*
|
||||
* ex1) src="a b", dest="c d" => no overlap
|
||||
* ex2) src="a b", dest="a b c" => no overlap
|
||||
* ex3) src="a b", dest="b c" => overlap; expandQueries={"a b c"}
|
||||
* ex4) src="a b c", dest="b c d" => overlap; expandQueries={"a b c d"}
|
||||
* ex5) src="a b c", dest="b c" => no overlap
|
||||
* ex6) src="a b c", dest="b" => no overlap
|
||||
* ex7) src="a a a a", dest="a a a" => overlap;
|
||||
* expandQueries={"a a a a a","a a a a a a"}
|
||||
* ex8) src="a b c d", dest="b c" => no overlap
|
||||
*/
|
||||
private void checkOverlap( Collection<Query> expandQueries, Term[] src, Term[] dest, int slop, float boost ){
|
||||
// beginning from 1 (not 0) is safe because that the PhraseQuery has multiple terms
|
||||
// is guaranteed in flatten() method (if PhraseQuery has only one term, flatten()
|
||||
// converts PhraseQuery to TermQuery)
|
||||
for( int i = 1; i < src.length; i++ ){
|
||||
boolean overlap = true;
|
||||
for( int j = i; j < src.length; j++ ){
|
||||
if( ( j - i ) < dest.length && !src[j].text().equals( dest[j-i].text() ) ){
|
||||
overlap = false;
|
||||
break;
|
||||
}
|
||||
}
|
||||
if( overlap && src.length - i < dest.length ){
|
||||
PhraseQuery pq = new PhraseQuery();
|
||||
for( Term srcTerm : src )
|
||||
pq.add( srcTerm );
|
||||
for( int k = src.length - i; k < dest.length; k++ ){
|
||||
pq.add( new Term( src[0].field(), dest[k].text() ) );
|
||||
}
|
||||
pq.setSlop( slop );
|
||||
pq.setBoost( boost );
|
||||
if(!expandQueries.contains( pq ) )
|
||||
expandQueries.add( pq );
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
QueryPhraseMap getRootMap( Query query ){
|
||||
String key = getKey( query );
|
||||
QueryPhraseMap map = rootMaps.get( key );
|
||||
if( map == null ){
|
||||
map = new QueryPhraseMap( this );
|
||||
rootMaps.put( key, map );
|
||||
}
|
||||
return map;
|
||||
}
|
||||
|
||||
/*
|
||||
* Return 'key' string. 'key' is the field name of the Query.
|
||||
* If not fieldMatch, 'key' will be null.
|
||||
*/
|
||||
private String getKey( Query query ){
|
||||
if( !fieldMatch ) return null;
|
||||
if( query instanceof TermQuery )
|
||||
return ((TermQuery)query).getTerm().field();
|
||||
else if ( query instanceof PhraseQuery ){
|
||||
PhraseQuery pq = (PhraseQuery)query;
|
||||
Term[] terms = pq.getTerms();
|
||||
return terms[0].field();
|
||||
}
|
||||
else if (query instanceof MultiTermQuery) {
|
||||
return ((MultiTermQuery)query).getField();
|
||||
}
|
||||
else
|
||||
throw new RuntimeException( "query \"" + query.toString() + "\" must be flatten first." );
|
||||
}
|
||||
|
||||
/*
|
||||
* Save the set of terms in the queries to termSetMap.
|
||||
*
|
||||
* ex1) q=name:john
|
||||
* - fieldMatch==true
|
||||
* termSetMap=Map<"name",Set<"john">>
|
||||
* - fieldMatch==false
|
||||
* termSetMap=Map<null,Set<"john">>
|
||||
*
|
||||
* ex2) q=name:john title:manager
|
||||
* - fieldMatch==true
|
||||
* termSetMap=Map<"name",Set<"john">,
|
||||
* "title",Set<"manager">>
|
||||
* - fieldMatch==false
|
||||
* termSetMap=Map<null,Set<"john","manager">>
|
||||
*
|
||||
* ex3) q=name:"john lennon"
|
||||
* - fieldMatch==true
|
||||
* termSetMap=Map<"name",Set<"john","lennon">>
|
||||
* - fieldMatch==false
|
||||
* termSetMap=Map<null,Set<"john","lennon">>
|
||||
*/
|
||||
void saveTerms( Collection<Query> flatQueries, IndexReader reader ) throws IOException{
|
||||
for( Query query : flatQueries ){
|
||||
Set<String> termSet = getTermSet( query );
|
||||
if( query instanceof TermQuery )
|
||||
termSet.add( ((TermQuery)query).getTerm().text() );
|
||||
else if( query instanceof PhraseQuery ){
|
||||
for( Term term : ((PhraseQuery)query).getTerms() )
|
||||
termSet.add( term.text() );
|
||||
}
|
||||
else if (query instanceof MultiTermQuery && reader != null) {
|
||||
BooleanQuery mtqTerms = (BooleanQuery) query.rewrite(reader);
|
||||
for (BooleanClause clause : mtqTerms.getClauses()) {
|
||||
termSet.add (((TermQuery) clause.getQuery()).getTerm().text());
|
||||
}
|
||||
}
|
||||
else
|
||||
throw new RuntimeException( "query \"" + query.toString() + "\" must be flatten first." );
|
||||
}
|
||||
}
|
||||
|
||||
private Set<String> getTermSet( Query query ){
|
||||
String key = getKey( query );
|
||||
Set<String> set = termSetMap.get( key );
|
||||
if( set == null ){
|
||||
set = new HashSet<String>();
|
||||
termSetMap.put( key, set );
|
||||
}
|
||||
return set;
|
||||
}
|
||||
|
||||
Set<String> getTermSet( String field ){
|
||||
return termSetMap.get( fieldMatch ? field : null );
|
||||
}
|
||||
|
||||
/**
|
||||
*
|
||||
* @return QueryPhraseMap
|
||||
*/
|
||||
public QueryPhraseMap getFieldTermMap( String fieldName, String term ){
|
||||
QueryPhraseMap rootMap = getRootMap( fieldName );
|
||||
return rootMap == null ? null : rootMap.subMap.get( term );
|
||||
}
|
||||
|
||||
/**
|
||||
*
|
||||
* @return QueryPhraseMap
|
||||
*/
|
||||
public QueryPhraseMap searchPhrase( String fieldName, final List<TermInfo> phraseCandidate ){
|
||||
QueryPhraseMap root = getRootMap( fieldName );
|
||||
if( root == null ) return null;
|
||||
return root.searchPhrase( phraseCandidate );
|
||||
}
|
||||
|
||||
public QueryPhraseMap getRootMap( String fieldName ){
|
||||
return rootMaps.get( fieldMatch ? fieldName : null );
|
||||
}
|
||||
|
||||
int nextTermOrPhraseNumber(){
|
||||
return termOrPhraseNumber++;
|
||||
}
|
||||
|
||||
/**
|
||||
* Internal structure of a query for highlighting: represents
|
||||
* a nested query structure
|
||||
*/
|
||||
public static class QueryPhraseMap {
|
||||
|
||||
boolean terminal;
|
||||
int slop; // valid if terminal == true and phraseHighlight == true
|
||||
float boost; // valid if terminal == true
|
||||
int[] positions; // valid if terminal == true
|
||||
int termOrPhraseNumber; // valid if terminal == true
|
||||
XFieldQuery fieldQuery;
|
||||
Map<String, QueryPhraseMap> subMap = new HashMap<String, QueryPhraseMap>();
|
||||
|
||||
public QueryPhraseMap( XFieldQuery fieldQuery ){
|
||||
this.fieldQuery = fieldQuery;
|
||||
}
|
||||
|
||||
void addTerm( Term term, float boost ){
|
||||
QueryPhraseMap map = getOrNewMap( subMap, term.text() );
|
||||
map.markTerminal( boost );
|
||||
}
|
||||
|
||||
private QueryPhraseMap getOrNewMap( Map<String, QueryPhraseMap> subMap, String term ){
|
||||
QueryPhraseMap map = subMap.get( term );
|
||||
if( map == null ){
|
||||
map = new QueryPhraseMap( fieldQuery );
|
||||
subMap.put( term, map );
|
||||
}
|
||||
return map;
|
||||
}
|
||||
|
||||
void add( Query query, IndexReader reader ) {
|
||||
if( query instanceof TermQuery ){
|
||||
addTerm( ((TermQuery)query).getTerm(), query.getBoost() );
|
||||
}
|
||||
else if( query instanceof PhraseQuery ){
|
||||
PhraseQuery pq = (PhraseQuery)query;
|
||||
final Term[] terms = pq.getTerms();
|
||||
final int[] positions = pq.getPositions();
|
||||
new InPlaceMergeSorter() {
|
||||
|
||||
@Override
|
||||
protected void swap(int i, int j) {
|
||||
Term tmpTerm = terms[i];
|
||||
terms[i] = terms[j];
|
||||
terms[j] = tmpTerm;
|
||||
|
||||
int tmpPos = positions[i];
|
||||
positions[i] = positions[j];
|
||||
positions[j] = tmpPos;
|
||||
}
|
||||
|
||||
@Override
|
||||
protected int compare(int i, int j) {
|
||||
return positions[i] - positions[j];
|
||||
}
|
||||
}.sort(0, terms.length);
|
||||
|
||||
addToMap(pq, terms, positions, 0, subMap, pq.getSlop());
|
||||
}
|
||||
else
|
||||
throw new RuntimeException( "query \"" + query.toString() + "\" must be flatten first." );
|
||||
}
|
||||
|
||||
private int numTermsAtSamePosition(int[] positions, int i) {
|
||||
int numTermsAtSamePosition = 1;
|
||||
for (int j = i + 1; j < positions.length; ++j) {
|
||||
if (positions[j] == positions[i]) {
|
||||
++numTermsAtSamePosition;
|
||||
}
|
||||
}
|
||||
return numTermsAtSamePosition;
|
||||
}
|
||||
|
||||
private void addToMap(PhraseQuery pq, Term[] terms, int[] positions, int i, Map<String, QueryPhraseMap> map, int slop) {
|
||||
int numTermsAtSamePosition = numTermsAtSamePosition(positions, i);
|
||||
for (int j = 0; j < numTermsAtSamePosition; ++j) {
|
||||
QueryPhraseMap qpm = getOrNewMap(map, terms[i + j].text());
|
||||
if (i + numTermsAtSamePosition == terms.length) {
|
||||
qpm.markTerminal(pq.getSlop(), pq.getBoost(), uniquePositions(positions));
|
||||
} else {
|
||||
addToMap(pq, terms, positions, i + numTermsAtSamePosition, qpm.subMap, slop);
|
||||
}
|
||||
}
|
||||
if (slop > 2 && i + numTermsAtSamePosition < terms.length) {
|
||||
Term[] otherTerms = Arrays.copyOf(terms, terms.length);
|
||||
int[] otherPositions = Arrays.copyOf(positions, positions.length);
|
||||
final int nextTermAtSamePosition = numTermsAtSamePosition(positions, i + numTermsAtSamePosition);
|
||||
System.arraycopy(terms, i + numTermsAtSamePosition, otherTerms, i, nextTermAtSamePosition);
|
||||
System.arraycopy(positions, i + numTermsAtSamePosition, otherPositions, i, nextTermAtSamePosition);
|
||||
System.arraycopy(terms, i, otherTerms, i + nextTermAtSamePosition, numTermsAtSamePosition);
|
||||
System.arraycopy(positions, i, otherPositions, i + nextTermAtSamePosition, numTermsAtSamePosition);
|
||||
addToMap(pq, otherTerms, otherPositions, i, map, slop - 2);
|
||||
}
|
||||
}
|
||||
|
||||
private int[] uniquePositions(int[] positions) {
|
||||
int uniqueCount = 1;
|
||||
for (int i = 1; i < positions.length; ++i) {
|
||||
if (positions[i] != positions[i - 1]) {
|
||||
++uniqueCount;
|
||||
}
|
||||
}
|
||||
if (uniqueCount == positions.length) {
|
||||
return positions;
|
||||
}
|
||||
int[] result = new int[uniqueCount];
|
||||
result[0] = positions[0];
|
||||
for (int i = 1, j = 1; i < positions.length; ++i) {
|
||||
if (positions[i] != positions[i - 1]) {
|
||||
result[j++] = positions[i];
|
||||
}
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
public QueryPhraseMap getTermMap( String term ){
|
||||
return subMap.get( term );
|
||||
}
|
||||
|
||||
private void markTerminal( float boost ){
|
||||
markTerminal( 0, boost, null );
|
||||
}
|
||||
|
||||
private void markTerminal( int slop, float boost, int[] positions ){
|
||||
if (slop > this.slop || (slop == this.slop && boost > this.boost)) {
|
||||
this.terminal = true;
|
||||
this.slop = slop;
|
||||
this.boost = boost;
|
||||
this.termOrPhraseNumber = fieldQuery.nextTermOrPhraseNumber();
|
||||
this.positions = positions;
|
||||
if (positions != null) {
|
||||
fieldQuery.maxPhraseWindow = Math.max(fieldQuery.maxPhraseWindow, slop + positions[positions.length-1] - positions[0]);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
public int getMaxPhraseWindow() {
|
||||
return fieldQuery.maxPhraseWindow;
|
||||
}
|
||||
|
||||
public boolean isTerminal(){
|
||||
return terminal;
|
||||
}
|
||||
|
||||
public int getSlop(){
|
||||
return slop;
|
||||
}
|
||||
|
||||
public float getBoost(){
|
||||
return boost;
|
||||
}
|
||||
|
||||
public int getTermOrPhraseNumber(){
|
||||
return termOrPhraseNumber;
|
||||
}
|
||||
|
||||
public QueryPhraseMap searchPhrase( final List<TermInfo> phraseCandidate ){
|
||||
QueryPhraseMap currMap = this;
|
||||
for( TermInfo ti : phraseCandidate ){
|
||||
currMap = currMap.subMap.get( ti.getText() );
|
||||
if( currMap == null ) return null;
|
||||
}
|
||||
return currMap.isValidTermOrPhrase( phraseCandidate ) ? currMap : null;
|
||||
}
|
||||
|
||||
public boolean isValidTermOrPhrase( final List<TermInfo> phraseCandidate ){
|
||||
// check terminal
|
||||
if( !terminal ) return false;
|
||||
|
||||
// if the candidate is a term, it is valid
|
||||
if( phraseCandidate.size() == 1 ) return true;
|
||||
|
||||
|
||||
assert phraseCandidate.size() == positions.length;
|
||||
// else check whether the candidate is valid phrase
|
||||
// compare position-gaps between terms to slop
|
||||
int pos = phraseCandidate.get( 0 ).getPosition();
|
||||
int totalDistance = 0;
|
||||
for( int i = 1; i < phraseCandidate.size(); i++ ){
|
||||
int nextPos = phraseCandidate.get( i ).getPosition();
|
||||
final int expectedDelta = positions[i] - positions[i - 1];
|
||||
final int actualDelta = nextPos - pos;
|
||||
totalDistance += Math.abs(expectedDelta - actualDelta);
|
||||
pos = nextPos;
|
||||
}
|
||||
return totalDistance <= slop;
|
||||
}
|
||||
}
|
||||
}
|
|
@ -1,212 +0,0 @@
|
|||
package org.apache.lucene.search.vectorhighlight;
|
||||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
import org.apache.lucene.index.*;
|
||||
import org.apache.lucene.util.BytesRef;
|
||||
import org.apache.lucene.util.CharsRef;
|
||||
import org.apache.lucene.util.CollectionUtil;
|
||||
import org.apache.lucene.util.UnicodeUtil;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.util.ArrayList;
|
||||
import java.util.LinkedList;
|
||||
import java.util.List;
|
||||
import java.util.Set;
|
||||
|
||||
/**
|
||||
* <code>FieldTermStack</code> is a stack that keeps query terms in the specified field
|
||||
* of the document to be highlighted.
|
||||
*/
|
||||
//LUCENE MONITOR - REMOVE ME WHEN LUCENE 4.5 IS OUT
|
||||
public class XFieldTermStack {
|
||||
|
||||
private final String fieldName;
|
||||
LinkedList<TermInfo> termList = new LinkedList<TermInfo>();
|
||||
|
||||
//public static void main( String[] args ) throws Exception {
|
||||
// Analyzer analyzer = new WhitespaceAnalyzer(Version.LUCENE_CURRENT);
|
||||
// QueryParser parser = new QueryParser(Version.LUCENE_CURRENT, "f", analyzer );
|
||||
// Query query = parser.parse( "a x:b" );
|
||||
// FieldQuery fieldQuery = new FieldQuery( query, true, false );
|
||||
|
||||
// Directory dir = new RAMDirectory();
|
||||
// IndexWriter writer = new IndexWriter(dir, new IndexWriterConfig(Version.LUCENE_CURRENT, analyzer));
|
||||
// Document doc = new Document();
|
||||
// FieldType ft = new FieldType(TextField.TYPE_STORED);
|
||||
// ft.setStoreTermVectors(true);
|
||||
// ft.setStoreTermVectorOffsets(true);
|
||||
// ft.setStoreTermVectorPositions(true);
|
||||
// doc.add( new Field( "f", ft, "a a a b b c a b b c d e f" ) );
|
||||
// doc.add( new Field( "f", ft, "b a b a f" ) );
|
||||
// writer.addDocument( doc );
|
||||
// writer.close();
|
||||
|
||||
// IndexReader reader = IndexReader.open(dir1);
|
||||
// new FieldTermStack( reader, 0, "f", fieldQuery );
|
||||
// reader.close();
|
||||
//}
|
||||
|
||||
/**
|
||||
* a constructor.
|
||||
*
|
||||
* @param reader IndexReader of the index
|
||||
* @param docId document id to be highlighted
|
||||
* @param fieldName field of the document to be highlighted
|
||||
* @param fieldQuery FieldQuery object
|
||||
* @throws IOException If there is a low-level I/O error
|
||||
*/
|
||||
public XFieldTermStack( IndexReader reader, int docId, String fieldName, final XFieldQuery fieldQuery ) throws IOException {
|
||||
this.fieldName = fieldName;
|
||||
|
||||
Set<String> termSet = fieldQuery.getTermSet( fieldName );
|
||||
// just return to make null snippet if un-matched fieldName specified when fieldMatch == true
|
||||
if( termSet == null ) return;
|
||||
|
||||
final Fields vectors = reader.getTermVectors(docId);
|
||||
if (vectors == null) {
|
||||
// null snippet
|
||||
return;
|
||||
}
|
||||
|
||||
final Terms vector = vectors.terms(fieldName);
|
||||
if (vector == null) {
|
||||
// null snippet
|
||||
return;
|
||||
}
|
||||
|
||||
final CharsRef spare = new CharsRef();
|
||||
final TermsEnum termsEnum = vector.iterator(null);
|
||||
DocsAndPositionsEnum dpEnum = null;
|
||||
BytesRef text;
|
||||
|
||||
int numDocs = reader.maxDoc();
|
||||
|
||||
final List<TermInfo> termList = new ArrayList<TermInfo>();
|
||||
while ((text = termsEnum.next()) != null) {
|
||||
UnicodeUtil.UTF8toUTF16(text, spare);
|
||||
final String term = spare.toString();
|
||||
if (!termSet.contains(term)) {
|
||||
continue;
|
||||
}
|
||||
dpEnum = termsEnum.docsAndPositions(null, dpEnum);
|
||||
if (dpEnum == null) {
|
||||
// null snippet
|
||||
return;
|
||||
}
|
||||
|
||||
dpEnum.nextDoc();
|
||||
|
||||
// For weight look here: http://lucene.apache.org/core/3_6_0/api/core/org/apache/lucene/search/DefaultSimilarity.html
|
||||
final float weight = ( float ) ( Math.log( numDocs / ( double ) ( reader.docFreq( new Term(fieldName, text) ) + 1 ) ) + 1.0 );
|
||||
|
||||
// ES EDIT: added a safety check to limit this to 512 terms everything above might be meaningless anyways
|
||||
// This limit protectes the FVH from running into StackOverflowErrors if super large TF docs are highlighted.
|
||||
final int freq = Math.min(512, dpEnum.freq());
|
||||
|
||||
|
||||
for(int i = 0;i < freq;i++) {
|
||||
int pos = dpEnum.nextPosition();
|
||||
if (dpEnum.startOffset() < 0) {
|
||||
return; // no offsets, null snippet
|
||||
}
|
||||
termList.add( new TermInfo( term, dpEnum.startOffset(), dpEnum.endOffset(), pos, weight ) );
|
||||
}
|
||||
}
|
||||
|
||||
// sort by position
|
||||
CollectionUtil.timSort(termList);
|
||||
this.termList.addAll(termList);
|
||||
}
|
||||
|
||||
/**
|
||||
* @return field name
|
||||
*/
|
||||
public String getFieldName(){
|
||||
return fieldName;
|
||||
}
|
||||
|
||||
/**
|
||||
* @return the top TermInfo object of the stack
|
||||
*/
|
||||
public TermInfo pop(){
|
||||
return termList.poll();
|
||||
}
|
||||
|
||||
/**
|
||||
* Return the top TermInfo object of the stack without removing it.
|
||||
*/
|
||||
public TermInfo peek() {
|
||||
return termList.peek();
|
||||
}
|
||||
|
||||
/**
|
||||
* @param termInfo the TermInfo object to be put on the top of the stack
|
||||
*/
|
||||
public void push( TermInfo termInfo ){
|
||||
termList.push( termInfo );
|
||||
}
|
||||
|
||||
/**
|
||||
* to know whether the stack is empty
|
||||
*
|
||||
* @return true if the stack is empty, false if not
|
||||
*/
|
||||
public boolean isEmpty(){
|
||||
return termList == null || termList.size() == 0;
|
||||
}
|
||||
|
||||
/**
|
||||
* Single term with its position/offsets in the document and IDF weight
|
||||
*/
|
||||
public static class TermInfo implements Comparable<TermInfo>{
|
||||
|
||||
private final String text;
|
||||
private final int startOffset;
|
||||
private final int endOffset;
|
||||
private final int position;
|
||||
|
||||
// IDF-weight of this term
|
||||
private final float weight;
|
||||
|
||||
public TermInfo( String text, int startOffset, int endOffset, int position, float weight ){
|
||||
this.text = text;
|
||||
this.startOffset = startOffset;
|
||||
this.endOffset = endOffset;
|
||||
this.position = position;
|
||||
this.weight = weight;
|
||||
}
|
||||
|
||||
public String getText(){ return text; }
|
||||
public int getStartOffset(){ return startOffset; }
|
||||
public int getEndOffset(){ return endOffset; }
|
||||
public int getPosition(){ return position; }
|
||||
public float getWeight(){ return weight; }
|
||||
|
||||
@Override
|
||||
public String toString(){
|
||||
StringBuilder sb = new StringBuilder();
|
||||
sb.append( text ).append( '(' ).append(startOffset).append( ',' ).append( endOffset ).append( ',' ).append( position ).append( ')' );
|
||||
return sb.toString();
|
||||
}
|
||||
|
||||
@Override
|
||||
public int compareTo( TermInfo o ){
|
||||
return ( this.position - o.position );
|
||||
}
|
||||
}
|
||||
}
|
|
@ -1,35 +0,0 @@
|
|||
package org.apache.lucene.search.vectorhighlight;
|
||||
|
||||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
/**
|
||||
* FragListBuilder is an interface for FieldFragList builder classes.
|
||||
* A FragListBuilder class can be plugged in to Highlighter.
|
||||
*/
|
||||
//LUCENE MONITOR - REMOVE ME WHEN LUCENE 4.5 IS OUT
|
||||
public interface XFragListBuilder {
|
||||
|
||||
/**
|
||||
* create a FieldFragList.
|
||||
*
|
||||
* @param fieldPhraseList FieldPhraseList object
|
||||
* @param fragCharSize the length (number of chars) of a fragment
|
||||
* @return the created FieldFragList object
|
||||
*/
|
||||
public XFieldFragList createFieldFragList( XFieldPhraseList fieldPhraseList, int fragCharSize );
|
||||
}
|
|
@ -1,96 +0,0 @@
|
|||
package org.apache.lucene.search.vectorhighlight;
|
||||
|
||||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
import org.apache.lucene.index.IndexReader;
|
||||
import org.apache.lucene.search.highlight.Encoder;
|
||||
|
||||
import java.io.IOException;
|
||||
|
||||
/**
|
||||
* {@link org.apache.lucene.search.vectorhighlight.XFragmentsBuilder} is an interface for fragments (snippets) builder classes.
|
||||
* A {@link org.apache.lucene.search.vectorhighlight.XFragmentsBuilder} class can be plugged in to
|
||||
* {@link org.apache.lucene.search.vectorhighlight.XFastVectorHighlighter}.
|
||||
*/
|
||||
//LUCENE MONITOR - REMOVE ME WHEN LUCENE 4.5 IS OUT
|
||||
public interface XFragmentsBuilder {
|
||||
|
||||
/**
|
||||
* create a fragment.
|
||||
*
|
||||
* @param reader IndexReader of the index
|
||||
* @param docId document id to be highlighted
|
||||
* @param fieldName field of the document to be highlighted
|
||||
* @param fieldFragList FieldFragList object
|
||||
* @return a created fragment or null when no fragment created
|
||||
* @throws IOException If there is a low-level I/O error
|
||||
*/
|
||||
public String createFragment( IndexReader reader, int docId, String fieldName,
|
||||
XFieldFragList fieldFragList ) throws IOException;
|
||||
|
||||
/**
|
||||
* create multiple fragments.
|
||||
*
|
||||
* @param reader IndexReader of the index
|
||||
* @param docId document id to be highlighter
|
||||
* @param fieldName field of the document to be highlighted
|
||||
* @param fieldFragList FieldFragList object
|
||||
* @param maxNumFragments maximum number of fragments
|
||||
* @return created fragments or null when no fragments created.
|
||||
* size of the array can be less than maxNumFragments
|
||||
* @throws IOException If there is a low-level I/O error
|
||||
*/
|
||||
public String[] createFragments( IndexReader reader, int docId, String fieldName,
|
||||
XFieldFragList fieldFragList, int maxNumFragments ) throws IOException;
|
||||
|
||||
/**
|
||||
* create a fragment.
|
||||
*
|
||||
* @param reader IndexReader of the index
|
||||
* @param docId document id to be highlighted
|
||||
* @param fieldName field of the document to be highlighted
|
||||
* @param fieldFragList FieldFragList object
|
||||
* @param preTags pre-tags to be used to highlight terms
|
||||
* @param postTags post-tags to be used to highlight terms
|
||||
* @param encoder an encoder that generates encoded text
|
||||
* @return a created fragment or null when no fragment created
|
||||
* @throws IOException If there is a low-level I/O error
|
||||
*/
|
||||
public String createFragment( IndexReader reader, int docId, String fieldName,
|
||||
XFieldFragList fieldFragList, String[] preTags, String[] postTags,
|
||||
Encoder encoder ) throws IOException;
|
||||
|
||||
/**
|
||||
* create multiple fragments.
|
||||
*
|
||||
* @param reader IndexReader of the index
|
||||
* @param docId document id to be highlighter
|
||||
* @param fieldName field of the document to be highlighted
|
||||
* @param fieldFragList FieldFragList object
|
||||
* @param maxNumFragments maximum number of fragments
|
||||
* @param preTags pre-tags to be used to highlight terms
|
||||
* @param postTags post-tags to be used to highlight terms
|
||||
* @param encoder an encoder that generates encoded text
|
||||
* @return created fragments or null when no fragments created.
|
||||
* size of the array can be less than maxNumFragments
|
||||
* @throws IOException If there is a low-level I/O error
|
||||
*/
|
||||
public String[] createFragments( IndexReader reader, int docId, String fieldName,
|
||||
XFieldFragList fieldFragList, int maxNumFragments, String[] preTags, String[] postTags,
|
||||
Encoder encoder ) throws IOException;
|
||||
}
|
|
@ -1,84 +0,0 @@
|
|||
package org.apache.lucene.search.vectorhighlight;
|
||||
|
||||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
import org.apache.lucene.search.vectorhighlight.XFieldFragList.WeightedFragInfo;
|
||||
import org.apache.lucene.util.CollectionUtil;
|
||||
|
||||
import java.util.Comparator;
|
||||
import java.util.List;
|
||||
|
||||
/**
|
||||
* An implementation of FragmentsBuilder that outputs score-order fragments.
|
||||
*/
|
||||
//LUCENE MONITOR - REMOVE ME WHEN LUCENE 4.5 IS OUT
|
||||
public class XScoreOrderFragmentsBuilder extends XBaseFragmentsBuilder {
|
||||
|
||||
/**
|
||||
* a constructor.
|
||||
*/
|
||||
public XScoreOrderFragmentsBuilder(){
|
||||
super();
|
||||
}
|
||||
|
||||
/**
|
||||
* a constructor.
|
||||
*
|
||||
* @param preTags array of pre-tags for markup terms.
|
||||
* @param postTags array of post-tags for markup terms.
|
||||
*/
|
||||
public XScoreOrderFragmentsBuilder( String[] preTags, String[] postTags ){
|
||||
super( preTags, postTags );
|
||||
}
|
||||
|
||||
public XScoreOrderFragmentsBuilder( BoundaryScanner bs ){
|
||||
super( bs );
|
||||
}
|
||||
|
||||
public XScoreOrderFragmentsBuilder( String[] preTags, String[] postTags, BoundaryScanner bs ){
|
||||
super( preTags, postTags, bs );
|
||||
}
|
||||
|
||||
/**
|
||||
* Sort by score the list of WeightedFragInfo
|
||||
*/
|
||||
@Override
|
||||
public List<WeightedFragInfo> getWeightedFragInfoList( List<WeightedFragInfo> src ) {
|
||||
CollectionUtil.timSort( src, new ScoreComparator() );
|
||||
return src;
|
||||
}
|
||||
|
||||
/**
|
||||
* Comparator for {@link WeightedFragInfo} by boost, breaking ties
|
||||
* by offset.
|
||||
*/
|
||||
public static class ScoreComparator implements Comparator<WeightedFragInfo> {
|
||||
|
||||
@Override
|
||||
public int compare( WeightedFragInfo o1, WeightedFragInfo o2 ) {
|
||||
if( o1.getTotalBoost() > o2.getTotalBoost() ) return -1;
|
||||
else if( o1.getTotalBoost() < o2.getTotalBoost() ) return 1;
|
||||
// if same score then check startOffset
|
||||
else{
|
||||
if( o1.getStartOffset() < o2.getStartOffset() ) return -1;
|
||||
else if( o1.getStartOffset() > o2.getStartOffset() ) return 1;
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
}
|
||||
}
|
|
@ -1,55 +0,0 @@
|
|||
package org.apache.lucene.search.vectorhighlight;
|
||||
|
||||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
import org.apache.lucene.search.vectorhighlight.XFieldFragList.WeightedFragInfo.SubInfo;
|
||||
import org.apache.lucene.search.vectorhighlight.XFieldPhraseList.WeightedPhraseInfo;
|
||||
|
||||
import java.util.ArrayList;
|
||||
import java.util.List;
|
||||
|
||||
/**
|
||||
* A simple implementation of {@link XFieldFragList}.
|
||||
*/
|
||||
//LUCENE MONITOR - REMOVE ME WHEN LUCENE 4.5 IS OUT
|
||||
public class XSimpleFieldFragList extends XFieldFragList {
|
||||
|
||||
/**
|
||||
* a constructor.
|
||||
*
|
||||
* @param fragCharSize the length (number of chars) of a fragment
|
||||
*/
|
||||
public XSimpleFieldFragList( int fragCharSize ) {
|
||||
super( fragCharSize );
|
||||
}
|
||||
|
||||
/* (non-Javadoc)
|
||||
* @see org.apache.lucene.search.vectorhighlight.FieldFragList#add( int startOffset, int endOffset, List<WeightedPhraseInfo> phraseInfoList )
|
||||
*/
|
||||
@Override
|
||||
public void add( int startOffset, int endOffset, List<WeightedPhraseInfo> phraseInfoList ) {
|
||||
float totalBoost = 0;
|
||||
List<SubInfo> subInfos = new ArrayList<SubInfo>();
|
||||
for( WeightedPhraseInfo phraseInfo : phraseInfoList ){
|
||||
subInfos.add( new SubInfo( phraseInfo.getText(), phraseInfo.getTermsOffsets(), phraseInfo.getSeqnum() ) );
|
||||
totalBoost += phraseInfo.getBoost();
|
||||
}
|
||||
getFragInfos().add( new WeightedFragInfo( startOffset, endOffset, subInfos, totalBoost ) );
|
||||
}
|
||||
|
||||
}
|
|
@ -1,43 +0,0 @@
|
|||
package org.apache.lucene.search.vectorhighlight;
|
||||
|
||||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
|
||||
/**
|
||||
* A simple implementation of {@link XFragListBuilder}.
|
||||
*/
|
||||
//LUCENE MONITOR - REMOVE ME WHEN LUCENE 4.5 IS OUT
|
||||
public class XSimpleFragListBuilder extends XBaseFragListBuilder {
|
||||
|
||||
public XSimpleFragListBuilder() {
|
||||
super();
|
||||
}
|
||||
|
||||
public XSimpleFragListBuilder(int margin) {
|
||||
super(margin);
|
||||
}
|
||||
|
||||
/* (non-Javadoc)
|
||||
* @see org.apache.lucene.search.vectorhighlight.FragListBuilder#createFieldFragList(FieldPhraseList fieldPhraseList, int fragCharSize)
|
||||
*/
|
||||
@Override
|
||||
public XFieldFragList createFieldFragList( XFieldPhraseList fieldPhraseList, int fragCharSize ){
|
||||
return createFieldFragList( fieldPhraseList, new XSimpleFieldFragList( fragCharSize ), fragCharSize );
|
||||
}
|
||||
|
||||
}
|
|
@ -1,63 +0,0 @@
|
|||
package org.apache.lucene.search.vectorhighlight;
|
||||
|
||||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
import org.apache.lucene.search.vectorhighlight.XFieldFragList.WeightedFragInfo;
|
||||
|
||||
import java.util.List;
|
||||
|
||||
/**
|
||||
* A simple implementation of FragmentsBuilder.
|
||||
*
|
||||
*/
|
||||
//LUCENE MONITOR - REMOVE ME WHEN LUCENE 4.5 IS OUT
|
||||
public class XSimpleFragmentsBuilder extends XBaseFragmentsBuilder {
|
||||
|
||||
/**
|
||||
* a constructor.
|
||||
*/
|
||||
public XSimpleFragmentsBuilder() {
|
||||
super();
|
||||
}
|
||||
|
||||
/**
|
||||
* a constructor.
|
||||
*
|
||||
* @param preTags array of pre-tags for markup terms.
|
||||
* @param postTags array of post-tags for markup terms.
|
||||
*/
|
||||
public XSimpleFragmentsBuilder( String[] preTags, String[] postTags ) {
|
||||
super( preTags, postTags );
|
||||
}
|
||||
|
||||
public XSimpleFragmentsBuilder( BoundaryScanner bs ) {
|
||||
super( bs );
|
||||
}
|
||||
|
||||
public XSimpleFragmentsBuilder( String[] preTags, String[] postTags, BoundaryScanner bs ) {
|
||||
super( preTags, postTags, bs );
|
||||
}
|
||||
|
||||
/**
|
||||
* do nothing. return the source list.
|
||||
*/
|
||||
@Override
|
||||
public List<WeightedFragInfo> getWeightedFragInfoList( List<WeightedFragInfo> src ) {
|
||||
return src;
|
||||
}
|
||||
}
|
|
@ -1,60 +0,0 @@
|
|||
package org.apache.lucene.search.vectorhighlight;
|
||||
|
||||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
import org.apache.lucene.search.vectorhighlight.XFieldFragList.WeightedFragInfo;
|
||||
import org.apache.lucene.search.vectorhighlight.XFieldPhraseList.WeightedPhraseInfo;
|
||||
|
||||
import java.util.ArrayList;
|
||||
import java.util.Iterator;
|
||||
import java.util.List;
|
||||
|
||||
/**
|
||||
* An implementation class of {@link XFragListBuilder} that generates one {@link WeightedFragInfo} object.
|
||||
* Typical use case of this class is that you can get an entire field contents
|
||||
* by using both of this class and {@link XSimpleFragmentsBuilder}.<br/>
|
||||
* <pre class="prettyprint">
|
||||
* FastVectorHighlighter h = new FastVectorHighlighter( true, true,
|
||||
* new SingleFragListBuilder(), new SimpleFragmentsBuilder() );
|
||||
* </pre>
|
||||
*/
|
||||
//LUCENE MONITOR - REMOVE ME WHEN LUCENE 4.5 IS OUT
|
||||
public class XSingleFragListBuilder implements XFragListBuilder {
|
||||
|
||||
@Override
|
||||
public XFieldFragList createFieldFragList(XFieldPhraseList fieldPhraseList,
|
||||
int fragCharSize) {
|
||||
|
||||
XFieldFragList ffl = new XSimpleFieldFragList( fragCharSize );
|
||||
|
||||
List<WeightedPhraseInfo> wpil = new ArrayList<WeightedPhraseInfo>();
|
||||
Iterator<WeightedPhraseInfo> ite = fieldPhraseList.phraseList.iterator();
|
||||
WeightedPhraseInfo phraseInfo = null;
|
||||
while( true ){
|
||||
if( !ite.hasNext() ) break;
|
||||
phraseInfo = ite.next();
|
||||
if( phraseInfo == null ) break;
|
||||
|
||||
wpil.add( phraseInfo );
|
||||
}
|
||||
if( wpil.size() > 0 )
|
||||
ffl.add( 0, Integer.MAX_VALUE, wpil );
|
||||
return ffl;
|
||||
}
|
||||
|
||||
}
|
|
@ -77,10 +77,10 @@ public class FastVectorHighlighter implements Highlighter {
|
|||
|
||||
try {
|
||||
MapperHighlightEntry entry = cache.mappers.get(mapper);
|
||||
XFieldQuery fieldQuery = null;
|
||||
FieldQuery fieldQuery = null;
|
||||
if (entry == null) {
|
||||
XFragListBuilder fragListBuilder;
|
||||
XBaseFragmentsBuilder fragmentsBuilder;
|
||||
FragListBuilder fragListBuilder;
|
||||
BaseFragmentsBuilder fragmentsBuilder;
|
||||
|
||||
BoundaryScanner boundaryScanner = DEFAULT_BOUNDARY_SCANNER;
|
||||
if (field.boundaryMaxScan() != SimpleBoundaryScanner.DEFAULT_MAX_SCAN || field.boundaryChars() != SimpleBoundaryScanner.DEFAULT_BOUNDARY_CHARS) {
|
||||
|
@ -88,7 +88,7 @@ public class FastVectorHighlighter implements Highlighter {
|
|||
}
|
||||
|
||||
if (field.numberOfFragments() == 0) {
|
||||
fragListBuilder = new XSingleFragListBuilder();
|
||||
fragListBuilder = new SingleFragListBuilder();
|
||||
|
||||
if (mapper.fieldType().stored()) {
|
||||
fragmentsBuilder = new SimpleFragmentsBuilder(mapper, field.preTags(), field.postTags(), boundaryScanner);
|
||||
|
@ -96,10 +96,10 @@ public class FastVectorHighlighter implements Highlighter {
|
|||
fragmentsBuilder = new SourceSimpleFragmentsBuilder(mapper, context, field.preTags(), field.postTags(), boundaryScanner);
|
||||
}
|
||||
} else {
|
||||
fragListBuilder = field.fragmentOffset() == -1 ? new XSimpleFragListBuilder() : new XSimpleFragListBuilder(field.fragmentOffset());
|
||||
fragListBuilder = field.fragmentOffset() == -1 ? new SimpleFragListBuilder() : new SimpleFragListBuilder(field.fragmentOffset());
|
||||
if (field.scoreOrdered()) {
|
||||
if (mapper.fieldType().stored()) {
|
||||
fragmentsBuilder = new XScoreOrderFragmentsBuilder(field.preTags(), field.postTags(), boundaryScanner);
|
||||
fragmentsBuilder = new ScoreOrderFragmentsBuilder(field.preTags(), field.postTags(), boundaryScanner);
|
||||
} else {
|
||||
fragmentsBuilder = new SourceScoreOrderFragmentsBuilder(mapper, context, field.preTags(), field.postTags(), boundaryScanner);
|
||||
}
|
||||
|
@ -119,7 +119,7 @@ public class FastVectorHighlighter implements Highlighter {
|
|||
// parameters to FVH are not requires since:
|
||||
// first two booleans are not relevant since they are set on the CustomFieldQuery (phrase and fieldMatch)
|
||||
// fragment builders are used explicitly
|
||||
cache.fvh = new org.apache.lucene.search.vectorhighlight.XFastVectorHighlighter();
|
||||
cache.fvh = new org.apache.lucene.search.vectorhighlight.FastVectorHighlighter();
|
||||
}
|
||||
CustomFieldQuery.highlightFilters.set(field.highlightFilter());
|
||||
if (field.requireFieldMatch()) {
|
||||
|
@ -158,16 +158,16 @@ public class FastVectorHighlighter implements Highlighter {
|
|||
}
|
||||
|
||||
private class MapperHighlightEntry {
|
||||
public XFragListBuilder fragListBuilder;
|
||||
public XFragmentsBuilder fragmentsBuilder;
|
||||
public FragListBuilder fragListBuilder;
|
||||
public FragmentsBuilder fragmentsBuilder;
|
||||
|
||||
public org.apache.lucene.search.highlight.Highlighter highlighter;
|
||||
}
|
||||
|
||||
private class HighlighterEntry {
|
||||
public org.apache.lucene.search.vectorhighlight.XFastVectorHighlighter fvh;
|
||||
public XFieldQuery noFieldMatchFieldQuery;
|
||||
public XFieldQuery fieldMatchFieldQuery;
|
||||
public org.apache.lucene.search.vectorhighlight.FastVectorHighlighter fvh;
|
||||
public FieldQuery noFieldMatchFieldQuery;
|
||||
public FieldQuery fieldMatchFieldQuery;
|
||||
public Map<FieldMapper, MapperHighlightEntry> mappers = Maps.newHashMap();
|
||||
}
|
||||
|
||||
|
|
|
@ -22,10 +22,10 @@ package org.elasticsearch.search.highlight.vectorhighlight;
|
|||
|
||||
import org.apache.lucene.analysis.Analyzer;
|
||||
import org.apache.lucene.document.Field;
|
||||
import org.apache.lucene.search.vectorhighlight.XFastVectorHighlighter;
|
||||
import org.apache.lucene.search.vectorhighlight.XFieldFragList.WeightedFragInfo;
|
||||
import org.apache.lucene.search.vectorhighlight.XFieldFragList.WeightedFragInfo.SubInfo;
|
||||
import org.apache.lucene.search.vectorhighlight.XFragmentsBuilder;
|
||||
import org.apache.lucene.search.vectorhighlight.FastVectorHighlighter;
|
||||
import org.apache.lucene.search.vectorhighlight.FieldFragList.WeightedFragInfo;
|
||||
import org.apache.lucene.search.vectorhighlight.FieldFragList.WeightedFragInfo.SubInfo;
|
||||
import org.apache.lucene.search.vectorhighlight.FragmentsBuilder;
|
||||
import org.apache.lucene.util.CollectionUtil;
|
||||
import org.apache.lucene.util.Version;
|
||||
import org.elasticsearch.index.analysis.*;
|
||||
|
@ -35,7 +35,7 @@ import java.util.Comparator;
|
|||
import java.util.List;
|
||||
|
||||
/**
|
||||
* Simple helper class for {@link XFastVectorHighlighter} {@link XFragmentsBuilder} implemenations.
|
||||
* Simple helper class for {@link FastVectorHighlighter} {@link FragmentsBuilder} implemenations.
|
||||
*/
|
||||
public final class FragmentBuilderHelper {
|
||||
|
||||
|
@ -45,7 +45,7 @@ public final class FragmentBuilderHelper {
|
|||
|
||||
/**
|
||||
* Fixes problems with broken analysis chains if positions and offsets are messed up that can lead to
|
||||
* {@link StringIndexOutOfBoundsException} in the {@link XFastVectorHighlighter}
|
||||
* {@link StringIndexOutOfBoundsException} in the {@link FastVectorHighlighter}
|
||||
*/
|
||||
public static WeightedFragInfo fixWeightedFragInfo(FieldMapper<?> mapper, Field[] values, WeightedFragInfo fragInfo) {
|
||||
assert fragInfo != null : "FragInfo must not be null";
|
||||
|
|
|
@ -21,14 +21,14 @@ package org.elasticsearch.search.highlight.vectorhighlight;
|
|||
import org.apache.lucene.document.Field;
|
||||
import org.apache.lucene.search.highlight.Encoder;
|
||||
import org.apache.lucene.search.vectorhighlight.BoundaryScanner;
|
||||
import org.apache.lucene.search.vectorhighlight.XFieldFragList.WeightedFragInfo;
|
||||
import org.apache.lucene.search.vectorhighlight.FieldFragList.WeightedFragInfo;
|
||||
import org.elasticsearch.index.mapper.FieldMapper;
|
||||
|
||||
/**
|
||||
* Direct Subclass of Lucene's org.apache.lucene.search.vectorhighlight.SimpleFragmentsBuilder
|
||||
* that corrects offsets for broken analysis chains.
|
||||
*/
|
||||
public class SimpleFragmentsBuilder extends org.apache.lucene.search.vectorhighlight.XSimpleFragmentsBuilder {
|
||||
public class SimpleFragmentsBuilder extends org.apache.lucene.search.vectorhighlight.SimpleFragmentsBuilder {
|
||||
protected final FieldMapper<?> mapper;
|
||||
|
||||
public SimpleFragmentsBuilder(FieldMapper<?> mapper,
|
||||
|
|
|
@ -19,25 +19,32 @@
|
|||
|
||||
package org.elasticsearch.search.highlight.vectorhighlight;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.util.Collections;
|
||||
import java.util.Comparator;
|
||||
import java.util.List;
|
||||
|
||||
import org.apache.lucene.analysis.Analyzer;
|
||||
import org.apache.lucene.analysis.ngram.NGramTokenizerFactory;
|
||||
import org.apache.lucene.document.Field;
|
||||
import org.apache.lucene.document.TextField;
|
||||
import org.apache.lucene.index.AtomicReaderContext;
|
||||
import org.apache.lucene.index.IndexReader;
|
||||
import org.apache.lucene.search.highlight.Encoder;
|
||||
import org.apache.lucene.search.vectorhighlight.BoundaryScanner;
|
||||
import org.apache.lucene.search.vectorhighlight.XFieldFragList.WeightedFragInfo;
|
||||
import org.apache.lucene.search.vectorhighlight.XScoreOrderFragmentsBuilder;
|
||||
import org.apache.lucene.search.vectorhighlight.FieldFragList.WeightedFragInfo;
|
||||
import org.apache.lucene.search.vectorhighlight.FieldFragList.WeightedFragInfo.SubInfo;
|
||||
import org.apache.lucene.search.vectorhighlight.ScoreOrderFragmentsBuilder;
|
||||
import org.elasticsearch.index.analysis.CustomAnalyzer;
|
||||
import org.elasticsearch.index.analysis.NamedAnalyzer;
|
||||
import org.elasticsearch.index.mapper.FieldMapper;
|
||||
import org.elasticsearch.search.internal.SearchContext;
|
||||
import org.elasticsearch.search.lookup.SearchLookup;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.util.List;
|
||||
|
||||
/**
|
||||
*
|
||||
*/
|
||||
public class SourceScoreOrderFragmentsBuilder extends XScoreOrderFragmentsBuilder {
|
||||
public class SourceScoreOrderFragmentsBuilder extends ScoreOrderFragmentsBuilder {
|
||||
|
||||
private final FieldMapper<?> mapper;
|
||||
|
||||
|
|
|
@ -1,87 +0,0 @@
|
|||
package org.apache.lucene.search.vectorhighlight;
|
||||
|
||||
/*
|
||||
* Licensed to ElasticSearch and Shay Banon under one
|
||||
* or more contributor license agreements. See the NOTICE file
|
||||
* distributed with this work for additional information
|
||||
* regarding copyright ownership. ElasticSearch licenses this
|
||||
* file to you under the Apache License, Version 2.0 (the
|
||||
* "License"); you may not use this file except in compliance
|
||||
* with the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing,
|
||||
* software distributed under the License is distributed on an
|
||||
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
||||
* KIND, either express or implied. See the License for the
|
||||
* specific language governing permissions and limitations
|
||||
* under the License.
|
||||
*/
|
||||
import org.apache.lucene.analysis.MockAnalyzer;
|
||||
import org.apache.lucene.analysis.MockTokenFilter;
|
||||
import org.apache.lucene.analysis.MockTokenizer;
|
||||
import org.apache.lucene.document.Document;
|
||||
import org.apache.lucene.document.Field;
|
||||
import org.apache.lucene.document.FieldType;
|
||||
import org.apache.lucene.document.TextField;
|
||||
import org.apache.lucene.index.DirectoryReader;
|
||||
import org.apache.lucene.index.IndexReader;
|
||||
import org.apache.lucene.index.IndexWriter;
|
||||
import org.apache.lucene.index.Term;
|
||||
import org.apache.lucene.search.IndexSearcher;
|
||||
import org.apache.lucene.search.PhraseQuery;
|
||||
import org.apache.lucene.search.TopDocs;
|
||||
import org.apache.lucene.store.Directory;
|
||||
import org.elasticsearch.test.integration.ElasticsearchLuceneTestCase;
|
||||
import org.junit.Test;
|
||||
|
||||
import java.io.IOException;
|
||||
|
||||
|
||||
public class XFastVectorHighlighterTest extends ElasticsearchLuceneTestCase {
|
||||
|
||||
@Test
|
||||
public void testLotsOfPhrases() throws IOException {
|
||||
Directory dir = newDirectory();
|
||||
IndexWriter writer = new IndexWriter(dir, newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random(), MockTokenizer.SIMPLE, true, MockTokenFilter.ENGLISH_STOPSET)));
|
||||
FieldType type = new FieldType(TextField.TYPE_STORED);
|
||||
type.setStoreTermVectorOffsets(true);
|
||||
type.setStoreTermVectorPositions(true);
|
||||
type.setStoreTermVectors(true);
|
||||
type.freeze();
|
||||
String[] terms = { "org", "apache", "lucene"};
|
||||
int iters = atLeast(1000);
|
||||
StringBuilder builder = new StringBuilder();
|
||||
for (int i = 0; i < iters; i++) {
|
||||
builder.append(terms[random().nextInt(terms.length)]).append(" ");
|
||||
if (random().nextInt(6) == 3) {
|
||||
builder.append("elasticsearch").append(" ");
|
||||
}
|
||||
}
|
||||
Document doc = new Document();
|
||||
Field field = new Field("field", builder.toString(), type);
|
||||
doc.add(field);
|
||||
writer.addDocument(doc);
|
||||
PhraseQuery query = new PhraseQuery();
|
||||
query.add(new Term("field", "org"));
|
||||
query.add(new Term("field", "apache"));
|
||||
query.add(new Term("field", "lucene"));
|
||||
|
||||
|
||||
XFastVectorHighlighter highlighter = new XFastVectorHighlighter();
|
||||
IndexReader reader = DirectoryReader.open(writer, true);
|
||||
IndexSearcher searcher = newSearcher(reader);
|
||||
TopDocs hits = searcher.search(query, 10);
|
||||
assertEquals(1, hits.totalHits);
|
||||
XFieldQuery fieldQuery = highlighter.getFieldQuery(query, reader);
|
||||
String[] bestFragments = highlighter.getBestFragments(fieldQuery, reader, hits.scoreDocs[0].doc, "field", 1000, 1);
|
||||
for (int i = 0; i < bestFragments.length; i++) {
|
||||
String result = bestFragments[i].replaceAll("<b>org apache lucene</b>", "FOOBAR");
|
||||
assertFalse(result.contains("org apache lucene"));
|
||||
}
|
||||
reader.close();
|
||||
writer.close();
|
||||
dir.close();
|
||||
}
|
||||
}
|
|
@ -40,6 +40,7 @@ import org.elasticsearch.test.integration.AbstractSharedClusterTest;
|
|||
import org.junit.Test;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.util.Arrays;
|
||||
|
||||
import static org.elasticsearch.action.search.SearchType.QUERY_THEN_FETCH;
|
||||
import static org.elasticsearch.client.Requests.searchRequest;
|
||||
|
@ -1139,6 +1140,70 @@ public class HighlighterSearchTests extends AbstractSharedClusterTest {
|
|||
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testDisableFastVectorHighlighter() throws Exception {
|
||||
client().admin().indices().prepareCreate("test").setSettings(ImmutableSettings.settingsBuilder().put("index.number_of_shards", 2))
|
||||
.addMapping("type1", jsonBuilder().startObject().startObject("type1").startObject("properties")
|
||||
.startObject("title").field("type", "string").field("store", "yes").field("term_vector", "with_positions_offsets").endObject()
|
||||
.endObject().endObject().endObject())
|
||||
.execute().actionGet();
|
||||
ensureGreen();
|
||||
|
||||
for (int i = 0; i < 5; i++) {
|
||||
client().prepareIndex("test", "type1", Integer.toString(i))
|
||||
.setSource("title", "This is a test for the workaround for the fast vector highlighting SOLR-3724").execute().actionGet();
|
||||
}
|
||||
refresh();
|
||||
SearchResponse search = client().prepareSearch()
|
||||
.setQuery(matchPhraseQuery("title", "test for the workaround"))
|
||||
.addHighlightedField("title", 50, 1, 10)
|
||||
.execute().actionGet();
|
||||
|
||||
assertThat(Arrays.toString(search.getShardFailures()), search.getFailedShards(), equalTo(0));
|
||||
|
||||
assertThat(search.getHits().totalHits(), equalTo(5l));
|
||||
assertThat(search.getHits().hits().length, equalTo(5));
|
||||
|
||||
for (SearchHit hit : search.getHits()) {
|
||||
// Because of SOLR-3724 nothing is highlighted when FVH is used
|
||||
assertThat(hit.highlightFields().isEmpty(), equalTo(true));
|
||||
}
|
||||
|
||||
// Using plain highlighter instead of FVH
|
||||
search = client().prepareSearch()
|
||||
.setQuery(matchPhraseQuery("title", "test for the workaround"))
|
||||
.addHighlightedField("title", 50, 1, 10)
|
||||
.setHighlighterType("highlighter")
|
||||
.execute().actionGet();
|
||||
|
||||
assertThat(Arrays.toString(search.getShardFailures()), search.getFailedShards(), equalTo(0));
|
||||
|
||||
assertThat(search.getHits().totalHits(), equalTo(5l));
|
||||
assertThat(search.getHits().hits().length, equalTo(5));
|
||||
|
||||
for (SearchHit hit : search.getHits()) {
|
||||
// With plain highlighter terms are highlighted correctly
|
||||
assertThat(hit.highlightFields().get("title").fragments()[0].string(), equalTo("This is a <em>test</em> for the <em>workaround</em> for the fast vector highlighting SOLR-3724"));
|
||||
}
|
||||
|
||||
// Using plain highlighter instead of FVH on the field level
|
||||
search = client().prepareSearch()
|
||||
.setQuery(matchPhraseQuery("title", "test for the workaround"))
|
||||
.addHighlightedField(new HighlightBuilder.Field("title").highlighterType("highlighter"))
|
||||
.setHighlighterType("highlighter")
|
||||
.execute().actionGet();
|
||||
|
||||
assertThat(Arrays.toString(search.getShardFailures()), search.getFailedShards(), equalTo(0));
|
||||
|
||||
assertThat(search.getHits().totalHits(), equalTo(5l));
|
||||
assertThat(search.getHits().hits().length, equalTo(5));
|
||||
|
||||
for (SearchHit hit : search.getHits()) {
|
||||
// With plain highlighter terms are highlighted correctly
|
||||
assertThat(hit.highlightFields().get("title").fragments()[0].string(), equalTo("This is a <em>test</em> for the <em>workaround</em> for the fast vector highlighting SOLR-3724"));
|
||||
}
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testFSHHighlightAllMvFragments() throws Exception {
|
||||
client().admin().indices().prepareCreate("test").setSettings(ImmutableSettings.settingsBuilder()
|
||||
|
@ -1528,54 +1593,4 @@ public class HighlighterSearchTests extends AbstractSharedClusterTest {
|
|||
assertThat(response.getFailedShards(), equalTo(0));
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testHighlightComplexPhraseQuery() throws Exception {
|
||||
prepareCreate("test")
|
||||
.setSettings(ImmutableSettings.builder()
|
||||
.put("analysis.analyzer.code.type", "custom")
|
||||
.put("analysis.analyzer.code.tokenizer", "code")
|
||||
.put("analysis.analyzer.code.filter", "code,lowercase")
|
||||
.put("analysis.tokenizer.code.type", "pattern")
|
||||
.put("analysis.tokenizer.code.pattern", "[.,:;/\"<>(){}\\[\\]\\s]")
|
||||
.put("analysis.filter.code.type", "word_delimiter")
|
||||
.put("analysis.filter.code.generate_word_parts", "true")
|
||||
.put("analysis.filter.code.generate_number_parts", "true")
|
||||
.put("analysis.filter.code.catenate_words", "false")
|
||||
.put("analysis.filter.code.catenate_numbers", "false")
|
||||
.put("analysis.filter.code.catenate_all", "false")
|
||||
.put("analysis.filter.code.split_on_case_change", "true")
|
||||
.put("analysis.filter.code.preserve_original", "true")
|
||||
.put("analysis.filter.code.split_on_numerics", "true")
|
||||
.put("analysis.filter.code.stem_english_possessive", "false")
|
||||
.build())
|
||||
.addMapping("type", jsonBuilder()
|
||||
.startObject()
|
||||
.startObject("type")
|
||||
.startObject("properties")
|
||||
.startObject("text")
|
||||
.field("type", "string")
|
||||
.field("analyzer", "code")
|
||||
.field("term_vector", "with_positions_offsets")
|
||||
.endObject()
|
||||
.endObject()
|
||||
.endObject()
|
||||
.endObject())
|
||||
.execute().actionGet();
|
||||
|
||||
ensureGreen();
|
||||
client().prepareIndex("test", "type", "1")
|
||||
.setSource(jsonBuilder().startObject()
|
||||
.field("text", "def log_worker_status( worker )\n pass")
|
||||
.endObject())
|
||||
.setRefresh(true)
|
||||
.execute().actionGet();
|
||||
|
||||
SearchResponse response = client().prepareSearch("test")
|
||||
.setQuery(QueryBuilders.matchPhraseQuery("text", "def log_worker_status( worker )"))
|
||||
.addHighlightedField("text").execute().actionGet();
|
||||
assertThat(response.getFailedShards(), equalTo(0));
|
||||
assertThat(response.getHits().totalHits(), equalTo(1L));
|
||||
assertThat(response.getHits().getAt(0).getHighlightFields().get("text").fragments()[0].string(), equalTo("<em>def log_worker_status( worker</em> )\n pass"));
|
||||
}
|
||||
|
||||
}
|
||||
|
|
|
@ -25,7 +25,7 @@ import org.apache.lucene.document.TextField;
|
|||
import org.apache.lucene.index.*;
|
||||
import org.apache.lucene.search.*;
|
||||
import org.apache.lucene.search.vectorhighlight.CustomFieldQuery;
|
||||
import org.apache.lucene.search.vectorhighlight.XFastVectorHighlighter;
|
||||
import org.apache.lucene.search.vectorhighlight.FastVectorHighlighter;
|
||||
import org.apache.lucene.store.Directory;
|
||||
import org.apache.lucene.store.RAMDirectory;
|
||||
import org.elasticsearch.common.lucene.Lucene;
|
||||
|
@ -55,7 +55,7 @@ public class VectorHighlighterTests {
|
|||
|
||||
assertThat(topDocs.totalHits, equalTo(1));
|
||||
|
||||
XFastVectorHighlighter highlighter = new XFastVectorHighlighter();
|
||||
FastVectorHighlighter highlighter = new FastVectorHighlighter();
|
||||
String fragment = highlighter.getBestFragment(highlighter.getFieldQuery(new TermQuery(new Term("content", "bad"))),
|
||||
reader, topDocs.scoreDocs[0].doc, "content", 30);
|
||||
assertThat(fragment, notNullValue());
|
||||
|
@ -78,7 +78,7 @@ public class VectorHighlighterTests {
|
|||
|
||||
assertThat(topDocs.totalHits, equalTo(1));
|
||||
|
||||
XFastVectorHighlighter highlighter = new XFastVectorHighlighter();
|
||||
FastVectorHighlighter highlighter = new FastVectorHighlighter();
|
||||
|
||||
PrefixQuery prefixQuery = new PrefixQuery(new Term("content", "ba"));
|
||||
assertThat(prefixQuery.getRewriteMethod().getClass().getName(), equalTo(PrefixQuery.CONSTANT_SCORE_AUTO_REWRITE_DEFAULT.getClass().getName()));
|
||||
|
@ -116,7 +116,7 @@ public class VectorHighlighterTests {
|
|||
|
||||
assertThat(topDocs.totalHits, equalTo(1));
|
||||
|
||||
XFastVectorHighlighter highlighter = new XFastVectorHighlighter();
|
||||
FastVectorHighlighter highlighter = new FastVectorHighlighter();
|
||||
String fragment = highlighter.getBestFragment(highlighter.getFieldQuery(new TermQuery(new Term("content", "bad"))),
|
||||
reader, topDocs.scoreDocs[0].doc, "content", 30);
|
||||
assertThat(fragment, nullValue());
|
||||
|
@ -138,7 +138,7 @@ public class VectorHighlighterTests {
|
|||
|
||||
assertThat(topDocs.totalHits, equalTo(1));
|
||||
|
||||
XFastVectorHighlighter highlighter = new XFastVectorHighlighter();
|
||||
FastVectorHighlighter highlighter = new FastVectorHighlighter();
|
||||
String fragment = highlighter.getBestFragment(highlighter.getFieldQuery(new TermQuery(new Term("content", "bad"))),
|
||||
reader, topDocs.scoreDocs[0].doc, "content", 30);
|
||||
assertThat(fragment, nullValue());
|
||||
|
|
Loading…
Reference in New Issue