remove our own version of highlighting boundary scanner
the bug that we fixed in it was already pushed to the lucene version
This commit is contained in:
parent
fadb0c8f32
commit
7fd54acd66
|
@ -1,62 +0,0 @@
|
|||
package org.elasticsearch.common.lucene.search.vectorhighlight;
|
||||
|
||||
import gnu.trove.set.hash.TCharHashSet;
|
||||
import org.apache.lucene.search.vectorhighlight.BoundaryScanner;
|
||||
|
||||
/**
|
||||
* A copy of Lucene {@link org.apache.lucene.search.vectorhighlight.XSimpleBoundaryScanner}.
|
||||
* <p/>
|
||||
* Uses specialized char set to lookup boundary, and fixes a problem with start offset in the
|
||||
* beginning of the text: https://issues.apache.org/jira/browse/LUCENE-3697 (which has a problem
|
||||
* with multiple empty fields to highlight...).
|
||||
*/
|
||||
public class SimpleBoundaryScanner2 implements BoundaryScanner {
|
||||
|
||||
public static final int DEFAULT_MAX_SCAN = 20;
|
||||
public static final char[] DEFAULT_BOUNDARY_CHARS = {'.', ',', '!', '?', ' ', '\t', '\n'};
|
||||
|
||||
public static final SimpleBoundaryScanner2 DEFAULT = new SimpleBoundaryScanner2();
|
||||
|
||||
public int maxScan;
|
||||
public TCharHashSet boundaryChars;
|
||||
|
||||
public SimpleBoundaryScanner2() {
|
||||
this(DEFAULT_MAX_SCAN, DEFAULT_BOUNDARY_CHARS);
|
||||
}
|
||||
|
||||
public SimpleBoundaryScanner2(int maxScan, char[] boundaryChars) {
|
||||
this.maxScan = maxScan;
|
||||
this.boundaryChars = new TCharHashSet(boundaryChars);
|
||||
}
|
||||
|
||||
public int findStartOffset(StringBuilder buffer, int start) {
|
||||
// avoid illegal start offset
|
||||
if (start > buffer.length() || start < 1) return start;
|
||||
int offset, count = maxScan;
|
||||
for (offset = start; offset > 0 && count > 0; count--) {
|
||||
// found?
|
||||
if (boundaryChars.contains(buffer.charAt(offset - 1))) return offset;
|
||||
offset--;
|
||||
}
|
||||
// LUCENE-3697
|
||||
if (offset == 0) {
|
||||
return 0;
|
||||
}
|
||||
// not found
|
||||
return start;
|
||||
}
|
||||
|
||||
public int findEndOffset(StringBuilder buffer, int start) {
|
||||
// avoid illegal start offset
|
||||
if (start > buffer.length() || start < 0) return start;
|
||||
int offset, count = maxScan;
|
||||
//for( offset = start; offset <= buffer.length() && count > 0; count-- ){
|
||||
for (offset = start; offset < buffer.length() && count > 0; count--) {
|
||||
// found?
|
||||
if (boundaryChars.contains(buffer.charAt(offset))) return offset;
|
||||
offset++;
|
||||
}
|
||||
// not found
|
||||
return start;
|
||||
}
|
||||
}
|
|
@ -25,7 +25,6 @@ import org.apache.lucene.search.highlight.SimpleHTMLEncoder;
|
|||
import org.apache.lucene.search.vectorhighlight.*;
|
||||
import org.elasticsearch.ElasticSearchIllegalArgumentException;
|
||||
import org.elasticsearch.common.inject.Inject;
|
||||
import org.elasticsearch.common.lucene.search.vectorhighlight.SimpleBoundaryScanner2;
|
||||
import org.elasticsearch.common.settings.Settings;
|
||||
import org.elasticsearch.common.text.StringText;
|
||||
import org.elasticsearch.index.mapper.FieldMapper;
|
||||
|
@ -43,6 +42,8 @@ import java.util.Map;
|
|||
*/
|
||||
public class FastVectorHighlighter implements Highlighter {
|
||||
|
||||
private static final SimpleBoundaryScanner DEFAULT_BOUNDARY_SCANNER = new SimpleBoundaryScanner();
|
||||
|
||||
private static final String CACHE_KEY = "highlight-fsv";
|
||||
private final Boolean termVectorMultiValue;
|
||||
|
||||
|
@ -81,9 +82,9 @@ public class FastVectorHighlighter implements Highlighter {
|
|||
XFragListBuilder fragListBuilder;
|
||||
XBaseFragmentsBuilder fragmentsBuilder;
|
||||
|
||||
BoundaryScanner boundaryScanner = SimpleBoundaryScanner2.DEFAULT;
|
||||
if (field.boundaryMaxScan() != SimpleBoundaryScanner2.DEFAULT_MAX_SCAN || field.boundaryChars() != SimpleBoundaryScanner2.DEFAULT_BOUNDARY_CHARS) {
|
||||
boundaryScanner = new SimpleBoundaryScanner2(field.boundaryMaxScan(), field.boundaryChars());
|
||||
BoundaryScanner boundaryScanner = DEFAULT_BOUNDARY_SCANNER;
|
||||
if (field.boundaryMaxScan() != SimpleBoundaryScanner.DEFAULT_MAX_SCAN || field.boundaryChars() != SimpleBoundaryScanner.DEFAULT_BOUNDARY_CHARS) {
|
||||
boundaryScanner = new SimpleBoundaryScanner(field.boundaryMaxScan(), field.boundaryChars());
|
||||
}
|
||||
|
||||
if (field.numberOfFragments() == 0) {
|
||||
|
|
|
@ -20,7 +20,7 @@
|
|||
package org.elasticsearch.search.highlight;
|
||||
|
||||
import com.google.common.collect.Lists;
|
||||
import org.elasticsearch.common.lucene.search.vectorhighlight.SimpleBoundaryScanner2;
|
||||
import org.apache.lucene.search.vectorhighlight.SimpleBoundaryScanner;
|
||||
import org.elasticsearch.common.xcontent.XContentParser;
|
||||
import org.elasticsearch.search.SearchParseElement;
|
||||
import org.elasticsearch.search.SearchParseException;
|
||||
|
@ -75,8 +75,8 @@ public class HighlighterParseElement implements SearchParseElement {
|
|||
int globalFragmentSize = 100;
|
||||
int globalNumOfFragments = 5;
|
||||
String globalEncoder = "default";
|
||||
int globalBoundaryMaxScan = SimpleBoundaryScanner2.DEFAULT_MAX_SCAN;
|
||||
char[] globalBoundaryChars = SimpleBoundaryScanner2.DEFAULT_BOUNDARY_CHARS;
|
||||
int globalBoundaryMaxScan = SimpleBoundaryScanner.DEFAULT_MAX_SCAN;
|
||||
Character[] globalBoundaryChars = SimpleBoundaryScanner.DEFAULT_BOUNDARY_CHARS;
|
||||
String globalHighlighterType = null;
|
||||
String globalFragmenter = null;
|
||||
Map<String, Object> globalOptions = null;
|
||||
|
@ -120,7 +120,11 @@ public class HighlighterParseElement implements SearchParseElement {
|
|||
} else if ("boundary_max_scan".equals(topLevelFieldName) || "boundaryMaxScan".equals(topLevelFieldName)) {
|
||||
globalBoundaryMaxScan = parser.intValue();
|
||||
} else if ("boundary_chars".equals(topLevelFieldName) || "boundaryChars".equals(topLevelFieldName)) {
|
||||
globalBoundaryChars = parser.text().toCharArray();
|
||||
char[] charsArr = parser.text().toCharArray();
|
||||
globalBoundaryChars = new Character[charsArr.length];
|
||||
for (int i = 0; i < charsArr.length; i++) {
|
||||
globalBoundaryChars[i] = charsArr[i];
|
||||
}
|
||||
} else if ("type".equals(topLevelFieldName)) {
|
||||
globalHighlighterType = parser.text();
|
||||
} else if ("fragmenter".equals(topLevelFieldName)) {
|
||||
|
@ -170,7 +174,12 @@ public class HighlighterParseElement implements SearchParseElement {
|
|||
} else if ("boundary_max_scan".equals(topLevelFieldName) || "boundaryMaxScan".equals(topLevelFieldName)) {
|
||||
field.boundaryMaxScan(parser.intValue());
|
||||
} else if ("boundary_chars".equals(topLevelFieldName) || "boundaryChars".equals(topLevelFieldName)) {
|
||||
field.boundaryChars(parser.text().toCharArray());
|
||||
char[] charsArr = parser.text().toCharArray();
|
||||
Character[] boundaryChars = new Character[charsArr.length];
|
||||
for (int i = 0; i < charsArr.length; i++) {
|
||||
boundaryChars[i] = charsArr[i];
|
||||
}
|
||||
field.boundaryChars(boundaryChars);
|
||||
} else if ("type".equals(fieldName)) {
|
||||
field.highlighterType(parser.text());
|
||||
} else if ("fragmenter".equals(fieldName)) {
|
||||
|
|
|
@ -64,7 +64,7 @@ public class SearchContextHighlight {
|
|||
private String fragmenter;
|
||||
|
||||
private int boundaryMaxScan = -1;
|
||||
private char[] boundaryChars = null;
|
||||
private Character[] boundaryChars = null;
|
||||
|
||||
private Map<String, Object> options;
|
||||
|
||||
|
@ -172,11 +172,11 @@ public class SearchContextHighlight {
|
|||
this.boundaryMaxScan = boundaryMaxScan;
|
||||
}
|
||||
|
||||
public char[] boundaryChars() {
|
||||
public Character[] boundaryChars() {
|
||||
return boundaryChars;
|
||||
}
|
||||
|
||||
public void boundaryChars(char[] boundaryChars) {
|
||||
public void boundaryChars(Character[] boundaryChars) {
|
||||
this.boundaryChars = boundaryChars;
|
||||
}
|
||||
|
||||
|
|
Loading…
Reference in New Issue