remove our own version of highlighting boundary scanner

the bug that we fixed in it was already pushed to the lucene version
This commit is contained in:
Shay Banon 2013-07-29 13:36:21 +02:00
parent fadb0c8f32
commit 7fd54acd66
4 changed files with 23 additions and 75 deletions

View File

@ -1,62 +0,0 @@
package org.elasticsearch.common.lucene.search.vectorhighlight;
import gnu.trove.set.hash.TCharHashSet;
import org.apache.lucene.search.vectorhighlight.BoundaryScanner;
/**
* A copy of Lucene {@link org.apache.lucene.search.vectorhighlight.XSimpleBoundaryScanner}.
* <p/>
* Uses specialized char set to lookup boundary, and fixes a problem with start offset in the
* beginning of the text: https://issues.apache.org/jira/browse/LUCENE-3697 (which has a problem
* with multiple empty fields to highlight...).
*/
public class SimpleBoundaryScanner2 implements BoundaryScanner {
public static final int DEFAULT_MAX_SCAN = 20;
public static final char[] DEFAULT_BOUNDARY_CHARS = {'.', ',', '!', '?', ' ', '\t', '\n'};
public static final SimpleBoundaryScanner2 DEFAULT = new SimpleBoundaryScanner2();
public int maxScan;
public TCharHashSet boundaryChars;
public SimpleBoundaryScanner2() {
this(DEFAULT_MAX_SCAN, DEFAULT_BOUNDARY_CHARS);
}
public SimpleBoundaryScanner2(int maxScan, char[] boundaryChars) {
this.maxScan = maxScan;
this.boundaryChars = new TCharHashSet(boundaryChars);
}
public int findStartOffset(StringBuilder buffer, int start) {
// avoid illegal start offset
if (start > buffer.length() || start < 1) return start;
int offset, count = maxScan;
for (offset = start; offset > 0 && count > 0; count--) {
// found?
if (boundaryChars.contains(buffer.charAt(offset - 1))) return offset;
offset--;
}
// LUCENE-3697
if (offset == 0) {
return 0;
}
// not found
return start;
}
public int findEndOffset(StringBuilder buffer, int start) {
// avoid illegal start offset
if (start > buffer.length() || start < 0) return start;
int offset, count = maxScan;
//for( offset = start; offset <= buffer.length() && count > 0; count-- ){
for (offset = start; offset < buffer.length() && count > 0; count--) {
// found?
if (boundaryChars.contains(buffer.charAt(offset))) return offset;
offset++;
}
// not found
return start;
}
}

View File

@ -25,7 +25,6 @@ import org.apache.lucene.search.highlight.SimpleHTMLEncoder;
import org.apache.lucene.search.vectorhighlight.*;
import org.elasticsearch.ElasticSearchIllegalArgumentException;
import org.elasticsearch.common.inject.Inject;
import org.elasticsearch.common.lucene.search.vectorhighlight.SimpleBoundaryScanner2;
import org.elasticsearch.common.settings.Settings;
import org.elasticsearch.common.text.StringText;
import org.elasticsearch.index.mapper.FieldMapper;
@ -43,6 +42,8 @@ import java.util.Map;
*/
public class FastVectorHighlighter implements Highlighter {
private static final SimpleBoundaryScanner DEFAULT_BOUNDARY_SCANNER = new SimpleBoundaryScanner();
private static final String CACHE_KEY = "highlight-fsv";
private final Boolean termVectorMultiValue;
@ -81,9 +82,9 @@ public class FastVectorHighlighter implements Highlighter {
XFragListBuilder fragListBuilder;
XBaseFragmentsBuilder fragmentsBuilder;
BoundaryScanner boundaryScanner = SimpleBoundaryScanner2.DEFAULT;
if (field.boundaryMaxScan() != SimpleBoundaryScanner2.DEFAULT_MAX_SCAN || field.boundaryChars() != SimpleBoundaryScanner2.DEFAULT_BOUNDARY_CHARS) {
boundaryScanner = new SimpleBoundaryScanner2(field.boundaryMaxScan(), field.boundaryChars());
BoundaryScanner boundaryScanner = DEFAULT_BOUNDARY_SCANNER;
if (field.boundaryMaxScan() != SimpleBoundaryScanner.DEFAULT_MAX_SCAN || field.boundaryChars() != SimpleBoundaryScanner.DEFAULT_BOUNDARY_CHARS) {
boundaryScanner = new SimpleBoundaryScanner(field.boundaryMaxScan(), field.boundaryChars());
}
if (field.numberOfFragments() == 0) {

View File

@ -20,7 +20,7 @@
package org.elasticsearch.search.highlight;
import com.google.common.collect.Lists;
import org.elasticsearch.common.lucene.search.vectorhighlight.SimpleBoundaryScanner2;
import org.apache.lucene.search.vectorhighlight.SimpleBoundaryScanner;
import org.elasticsearch.common.xcontent.XContentParser;
import org.elasticsearch.search.SearchParseElement;
import org.elasticsearch.search.SearchParseException;
@ -75,8 +75,8 @@ public class HighlighterParseElement implements SearchParseElement {
int globalFragmentSize = 100;
int globalNumOfFragments = 5;
String globalEncoder = "default";
int globalBoundaryMaxScan = SimpleBoundaryScanner2.DEFAULT_MAX_SCAN;
char[] globalBoundaryChars = SimpleBoundaryScanner2.DEFAULT_BOUNDARY_CHARS;
int globalBoundaryMaxScan = SimpleBoundaryScanner.DEFAULT_MAX_SCAN;
Character[] globalBoundaryChars = SimpleBoundaryScanner.DEFAULT_BOUNDARY_CHARS;
String globalHighlighterType = null;
String globalFragmenter = null;
Map<String, Object> globalOptions = null;
@ -120,7 +120,11 @@ public class HighlighterParseElement implements SearchParseElement {
} else if ("boundary_max_scan".equals(topLevelFieldName) || "boundaryMaxScan".equals(topLevelFieldName)) {
globalBoundaryMaxScan = parser.intValue();
} else if ("boundary_chars".equals(topLevelFieldName) || "boundaryChars".equals(topLevelFieldName)) {
globalBoundaryChars = parser.text().toCharArray();
char[] charsArr = parser.text().toCharArray();
globalBoundaryChars = new Character[charsArr.length];
for (int i = 0; i < charsArr.length; i++) {
globalBoundaryChars[i] = charsArr[i];
}
} else if ("type".equals(topLevelFieldName)) {
globalHighlighterType = parser.text();
} else if ("fragmenter".equals(topLevelFieldName)) {
@ -170,7 +174,12 @@ public class HighlighterParseElement implements SearchParseElement {
} else if ("boundary_max_scan".equals(topLevelFieldName) || "boundaryMaxScan".equals(topLevelFieldName)) {
field.boundaryMaxScan(parser.intValue());
} else if ("boundary_chars".equals(topLevelFieldName) || "boundaryChars".equals(topLevelFieldName)) {
field.boundaryChars(parser.text().toCharArray());
char[] charsArr = parser.text().toCharArray();
Character[] boundaryChars = new Character[charsArr.length];
for (int i = 0; i < charsArr.length; i++) {
boundaryChars[i] = charsArr[i];
}
field.boundaryChars(boundaryChars);
} else if ("type".equals(fieldName)) {
field.highlighterType(parser.text());
} else if ("fragmenter".equals(fieldName)) {

View File

@ -64,7 +64,7 @@ public class SearchContextHighlight {
private String fragmenter;
private int boundaryMaxScan = -1;
private char[] boundaryChars = null;
private Character[] boundaryChars = null;
private Map<String, Object> options;
@ -172,11 +172,11 @@ public class SearchContextHighlight {
this.boundaryMaxScan = boundaryMaxScan;
}
public char[] boundaryChars() {
public Character[] boundaryChars() {
return boundaryChars;
}
public void boundaryChars(char[] boundaryChars) {
public void boundaryChars(Character[] boundaryChars) {
this.boundaryChars = boundaryChars;
}