diff --git a/src/main/java/org/elasticsearch/common/lucene/search/vectorhighlight/SimpleBoundaryScanner2.java b/src/main/java/org/elasticsearch/common/lucene/search/vectorhighlight/SimpleBoundaryScanner2.java deleted file mode 100644 index 92f73bf733a..00000000000 --- a/src/main/java/org/elasticsearch/common/lucene/search/vectorhighlight/SimpleBoundaryScanner2.java +++ /dev/null @@ -1,62 +0,0 @@ -package org.elasticsearch.common.lucene.search.vectorhighlight; - -import gnu.trove.set.hash.TCharHashSet; -import org.apache.lucene.search.vectorhighlight.BoundaryScanner; - -/** - * A copy of Lucene {@link org.apache.lucene.search.vectorhighlight.XSimpleBoundaryScanner}. - *

- * Uses specialized char set to lookup boundary, and fixes a problem with start offset in the - * beginning of the text: https://issues.apache.org/jira/browse/LUCENE-3697 (which has a problem - * with multiple empty fields to highlight...). - */ -public class SimpleBoundaryScanner2 implements BoundaryScanner { - - public static final int DEFAULT_MAX_SCAN = 20; - public static final char[] DEFAULT_BOUNDARY_CHARS = {'.', ',', '!', '?', ' ', '\t', '\n'}; - - public static final SimpleBoundaryScanner2 DEFAULT = new SimpleBoundaryScanner2(); - - public int maxScan; - public TCharHashSet boundaryChars; - - public SimpleBoundaryScanner2() { - this(DEFAULT_MAX_SCAN, DEFAULT_BOUNDARY_CHARS); - } - - public SimpleBoundaryScanner2(int maxScan, char[] boundaryChars) { - this.maxScan = maxScan; - this.boundaryChars = new TCharHashSet(boundaryChars); - } - - public int findStartOffset(StringBuilder buffer, int start) { - // avoid illegal start offset - if (start > buffer.length() || start < 1) return start; - int offset, count = maxScan; - for (offset = start; offset > 0 && count > 0; count--) { - // found? - if (boundaryChars.contains(buffer.charAt(offset - 1))) return offset; - offset--; - } - // LUCENE-3697 - if (offset == 0) { - return 0; - } - // not found - return start; - } - - public int findEndOffset(StringBuilder buffer, int start) { - // avoid illegal start offset - if (start > buffer.length() || start < 0) return start; - int offset, count = maxScan; - //for( offset = start; offset <= buffer.length() && count > 0; count-- ){ - for (offset = start; offset < buffer.length() && count > 0; count--) { - // found? - if (boundaryChars.contains(buffer.charAt(offset))) return offset; - offset++; - } - // not found - return start; - } -} diff --git a/src/main/java/org/elasticsearch/search/highlight/FastVectorHighlighter.java b/src/main/java/org/elasticsearch/search/highlight/FastVectorHighlighter.java index a1b4244f493..b48d4404611 100644 --- a/src/main/java/org/elasticsearch/search/highlight/FastVectorHighlighter.java +++ b/src/main/java/org/elasticsearch/search/highlight/FastVectorHighlighter.java @@ -25,7 +25,6 @@ import org.apache.lucene.search.highlight.SimpleHTMLEncoder; import org.apache.lucene.search.vectorhighlight.*; import org.elasticsearch.ElasticSearchIllegalArgumentException; import org.elasticsearch.common.inject.Inject; -import org.elasticsearch.common.lucene.search.vectorhighlight.SimpleBoundaryScanner2; import org.elasticsearch.common.settings.Settings; import org.elasticsearch.common.text.StringText; import org.elasticsearch.index.mapper.FieldMapper; @@ -43,6 +42,8 @@ import java.util.Map; */ public class FastVectorHighlighter implements Highlighter { + private static final SimpleBoundaryScanner DEFAULT_BOUNDARY_SCANNER = new SimpleBoundaryScanner(); + private static final String CACHE_KEY = "highlight-fsv"; private final Boolean termVectorMultiValue; @@ -53,7 +54,7 @@ public class FastVectorHighlighter implements Highlighter { @Override public String[] names() { - return new String[] { "fvh", "fast-vector-highlighter" }; + return new String[]{"fvh", "fast-vector-highlighter"}; } @Override @@ -81,9 +82,9 @@ public class FastVectorHighlighter implements Highlighter { XFragListBuilder fragListBuilder; XBaseFragmentsBuilder fragmentsBuilder; - BoundaryScanner boundaryScanner = SimpleBoundaryScanner2.DEFAULT; - if (field.boundaryMaxScan() != SimpleBoundaryScanner2.DEFAULT_MAX_SCAN || field.boundaryChars() != SimpleBoundaryScanner2.DEFAULT_BOUNDARY_CHARS) { - boundaryScanner = new SimpleBoundaryScanner2(field.boundaryMaxScan(), field.boundaryChars()); + BoundaryScanner boundaryScanner = DEFAULT_BOUNDARY_SCANNER; + if (field.boundaryMaxScan() != SimpleBoundaryScanner.DEFAULT_MAX_SCAN || field.boundaryChars() != SimpleBoundaryScanner.DEFAULT_BOUNDARY_CHARS) { + boundaryScanner = new SimpleBoundaryScanner(field.boundaryMaxScan(), field.boundaryChars()); } if (field.numberOfFragments() == 0) { diff --git a/src/main/java/org/elasticsearch/search/highlight/HighlighterParseElement.java b/src/main/java/org/elasticsearch/search/highlight/HighlighterParseElement.java index 13cd002217e..edceffc7cbc 100644 --- a/src/main/java/org/elasticsearch/search/highlight/HighlighterParseElement.java +++ b/src/main/java/org/elasticsearch/search/highlight/HighlighterParseElement.java @@ -20,7 +20,7 @@ package org.elasticsearch.search.highlight; import com.google.common.collect.Lists; -import org.elasticsearch.common.lucene.search.vectorhighlight.SimpleBoundaryScanner2; +import org.apache.lucene.search.vectorhighlight.SimpleBoundaryScanner; import org.elasticsearch.common.xcontent.XContentParser; import org.elasticsearch.search.SearchParseElement; import org.elasticsearch.search.SearchParseException; @@ -75,8 +75,8 @@ public class HighlighterParseElement implements SearchParseElement { int globalFragmentSize = 100; int globalNumOfFragments = 5; String globalEncoder = "default"; - int globalBoundaryMaxScan = SimpleBoundaryScanner2.DEFAULT_MAX_SCAN; - char[] globalBoundaryChars = SimpleBoundaryScanner2.DEFAULT_BOUNDARY_CHARS; + int globalBoundaryMaxScan = SimpleBoundaryScanner.DEFAULT_MAX_SCAN; + Character[] globalBoundaryChars = SimpleBoundaryScanner.DEFAULT_BOUNDARY_CHARS; String globalHighlighterType = null; String globalFragmenter = null; Map globalOptions = null; @@ -120,7 +120,11 @@ public class HighlighterParseElement implements SearchParseElement { } else if ("boundary_max_scan".equals(topLevelFieldName) || "boundaryMaxScan".equals(topLevelFieldName)) { globalBoundaryMaxScan = parser.intValue(); } else if ("boundary_chars".equals(topLevelFieldName) || "boundaryChars".equals(topLevelFieldName)) { - globalBoundaryChars = parser.text().toCharArray(); + char[] charsArr = parser.text().toCharArray(); + globalBoundaryChars = new Character[charsArr.length]; + for (int i = 0; i < charsArr.length; i++) { + globalBoundaryChars[i] = charsArr[i]; + } } else if ("type".equals(topLevelFieldName)) { globalHighlighterType = parser.text(); } else if ("fragmenter".equals(topLevelFieldName)) { @@ -170,7 +174,12 @@ public class HighlighterParseElement implements SearchParseElement { } else if ("boundary_max_scan".equals(topLevelFieldName) || "boundaryMaxScan".equals(topLevelFieldName)) { field.boundaryMaxScan(parser.intValue()); } else if ("boundary_chars".equals(topLevelFieldName) || "boundaryChars".equals(topLevelFieldName)) { - field.boundaryChars(parser.text().toCharArray()); + char[] charsArr = parser.text().toCharArray(); + Character[] boundaryChars = new Character[charsArr.length]; + for (int i = 0; i < charsArr.length; i++) { + boundaryChars[i] = charsArr[i]; + } + field.boundaryChars(boundaryChars); } else if ("type".equals(fieldName)) { field.highlighterType(parser.text()); } else if ("fragmenter".equals(fieldName)) { diff --git a/src/main/java/org/elasticsearch/search/highlight/SearchContextHighlight.java b/src/main/java/org/elasticsearch/search/highlight/SearchContextHighlight.java index 0bc1ffc434d..04e71d238c2 100644 --- a/src/main/java/org/elasticsearch/search/highlight/SearchContextHighlight.java +++ b/src/main/java/org/elasticsearch/search/highlight/SearchContextHighlight.java @@ -64,7 +64,7 @@ public class SearchContextHighlight { private String fragmenter; private int boundaryMaxScan = -1; - private char[] boundaryChars = null; + private Character[] boundaryChars = null; private Map options; @@ -172,11 +172,11 @@ public class SearchContextHighlight { this.boundaryMaxScan = boundaryMaxScan; } - public char[] boundaryChars() { + public Character[] boundaryChars() { return boundaryChars; } - public void boundaryChars(char[] boundaryChars) { + public void boundaryChars(Character[] boundaryChars) { this.boundaryChars = boundaryChars; }