diff --git a/src/main/java/org/elasticsearch/common/lucene/search/vectorhighlight/SimpleBoundaryScanner2.java b/src/main/java/org/elasticsearch/common/lucene/search/vectorhighlight/SimpleBoundaryScanner2.java deleted file mode 100644 index 92f73bf733a..00000000000 --- a/src/main/java/org/elasticsearch/common/lucene/search/vectorhighlight/SimpleBoundaryScanner2.java +++ /dev/null @@ -1,62 +0,0 @@ -package org.elasticsearch.common.lucene.search.vectorhighlight; - -import gnu.trove.set.hash.TCharHashSet; -import org.apache.lucene.search.vectorhighlight.BoundaryScanner; - -/** - * A copy of Lucene {@link org.apache.lucene.search.vectorhighlight.XSimpleBoundaryScanner}. - *
- * Uses specialized char set to lookup boundary, and fixes a problem with start offset in the - * beginning of the text: https://issues.apache.org/jira/browse/LUCENE-3697 (which has a problem - * with multiple empty fields to highlight...). - */ -public class SimpleBoundaryScanner2 implements BoundaryScanner { - - public static final int DEFAULT_MAX_SCAN = 20; - public static final char[] DEFAULT_BOUNDARY_CHARS = {'.', ',', '!', '?', ' ', '\t', '\n'}; - - public static final SimpleBoundaryScanner2 DEFAULT = new SimpleBoundaryScanner2(); - - public int maxScan; - public TCharHashSet boundaryChars; - - public SimpleBoundaryScanner2() { - this(DEFAULT_MAX_SCAN, DEFAULT_BOUNDARY_CHARS); - } - - public SimpleBoundaryScanner2(int maxScan, char[] boundaryChars) { - this.maxScan = maxScan; - this.boundaryChars = new TCharHashSet(boundaryChars); - } - - public int findStartOffset(StringBuilder buffer, int start) { - // avoid illegal start offset - if (start > buffer.length() || start < 1) return start; - int offset, count = maxScan; - for (offset = start; offset > 0 && count > 0; count--) { - // found? - if (boundaryChars.contains(buffer.charAt(offset - 1))) return offset; - offset--; - } - // LUCENE-3697 - if (offset == 0) { - return 0; - } - // not found - return start; - } - - public int findEndOffset(StringBuilder buffer, int start) { - // avoid illegal start offset - if (start > buffer.length() || start < 0) return start; - int offset, count = maxScan; - //for( offset = start; offset <= buffer.length() && count > 0; count-- ){ - for (offset = start; offset < buffer.length() && count > 0; count--) { - // found? - if (boundaryChars.contains(buffer.charAt(offset))) return offset; - offset++; - } - // not found - return start; - } -} diff --git a/src/main/java/org/elasticsearch/search/highlight/FastVectorHighlighter.java b/src/main/java/org/elasticsearch/search/highlight/FastVectorHighlighter.java index a1b4244f493..b48d4404611 100644 --- a/src/main/java/org/elasticsearch/search/highlight/FastVectorHighlighter.java +++ b/src/main/java/org/elasticsearch/search/highlight/FastVectorHighlighter.java @@ -25,7 +25,6 @@ import org.apache.lucene.search.highlight.SimpleHTMLEncoder; import org.apache.lucene.search.vectorhighlight.*; import org.elasticsearch.ElasticSearchIllegalArgumentException; import org.elasticsearch.common.inject.Inject; -import org.elasticsearch.common.lucene.search.vectorhighlight.SimpleBoundaryScanner2; import org.elasticsearch.common.settings.Settings; import org.elasticsearch.common.text.StringText; import org.elasticsearch.index.mapper.FieldMapper; @@ -43,6 +42,8 @@ import java.util.Map; */ public class FastVectorHighlighter implements Highlighter { + private static final SimpleBoundaryScanner DEFAULT_BOUNDARY_SCANNER = new SimpleBoundaryScanner(); + private static final String CACHE_KEY = "highlight-fsv"; private final Boolean termVectorMultiValue; @@ -53,7 +54,7 @@ public class FastVectorHighlighter implements Highlighter { @Override public String[] names() { - return new String[] { "fvh", "fast-vector-highlighter" }; + return new String[]{"fvh", "fast-vector-highlighter"}; } @Override @@ -81,9 +82,9 @@ public class FastVectorHighlighter implements Highlighter { XFragListBuilder fragListBuilder; XBaseFragmentsBuilder fragmentsBuilder; - BoundaryScanner boundaryScanner = SimpleBoundaryScanner2.DEFAULT; - if (field.boundaryMaxScan() != SimpleBoundaryScanner2.DEFAULT_MAX_SCAN || field.boundaryChars() != SimpleBoundaryScanner2.DEFAULT_BOUNDARY_CHARS) { - boundaryScanner = new SimpleBoundaryScanner2(field.boundaryMaxScan(), field.boundaryChars()); + BoundaryScanner boundaryScanner = DEFAULT_BOUNDARY_SCANNER; + if (field.boundaryMaxScan() != SimpleBoundaryScanner.DEFAULT_MAX_SCAN || field.boundaryChars() != SimpleBoundaryScanner.DEFAULT_BOUNDARY_CHARS) { + boundaryScanner = new SimpleBoundaryScanner(field.boundaryMaxScan(), field.boundaryChars()); } if (field.numberOfFragments() == 0) { diff --git a/src/main/java/org/elasticsearch/search/highlight/HighlighterParseElement.java b/src/main/java/org/elasticsearch/search/highlight/HighlighterParseElement.java index 13cd002217e..edceffc7cbc 100644 --- a/src/main/java/org/elasticsearch/search/highlight/HighlighterParseElement.java +++ b/src/main/java/org/elasticsearch/search/highlight/HighlighterParseElement.java @@ -20,7 +20,7 @@ package org.elasticsearch.search.highlight; import com.google.common.collect.Lists; -import org.elasticsearch.common.lucene.search.vectorhighlight.SimpleBoundaryScanner2; +import org.apache.lucene.search.vectorhighlight.SimpleBoundaryScanner; import org.elasticsearch.common.xcontent.XContentParser; import org.elasticsearch.search.SearchParseElement; import org.elasticsearch.search.SearchParseException; @@ -75,8 +75,8 @@ public class HighlighterParseElement implements SearchParseElement { int globalFragmentSize = 100; int globalNumOfFragments = 5; String globalEncoder = "default"; - int globalBoundaryMaxScan = SimpleBoundaryScanner2.DEFAULT_MAX_SCAN; - char[] globalBoundaryChars = SimpleBoundaryScanner2.DEFAULT_BOUNDARY_CHARS; + int globalBoundaryMaxScan = SimpleBoundaryScanner.DEFAULT_MAX_SCAN; + Character[] globalBoundaryChars = SimpleBoundaryScanner.DEFAULT_BOUNDARY_CHARS; String globalHighlighterType = null; String globalFragmenter = null; Map