diff --git a/lucene/highlighter/src/java/org/apache/lucene/search/uhighlight/DefaultPassageFormatter.java b/lucene/highlighter/src/java/org/apache/lucene/search/uhighlight/DefaultPassageFormatter.java index bc27a435a47..62d58df70f0 100644 --- a/lucene/highlighter/src/java/org/apache/lucene/search/uhighlight/DefaultPassageFormatter.java +++ b/lucene/highlighter/src/java/org/apache/lucene/search/uhighlight/DefaultPassageFormatter.java @@ -24,115 +24,117 @@ package org.apache.lucene.search.uhighlight; * ellipses between unconnected passages. */ public class DefaultPassageFormatter extends PassageFormatter { - /** text that will appear before highlighted terms */ - protected final String preTag; - /** text that will appear after highlighted terms */ - protected final String postTag; - /** text that will appear between two unconnected passages */ - protected final String ellipsis; - /** true if we should escape for html */ - protected final boolean escape; + /** text that will appear before highlighted terms */ + protected final String preTag; + /** text that will appear after highlighted terms */ + protected final String postTag; + /** text that will appear between two unconnected passages */ + protected final String ellipsis; + /** true if we should escape for html */ + protected final boolean escape; - /** - * Creates a new DefaultPassageFormatter with the default tags. - */ - public DefaultPassageFormatter() { - this("", "", "... ", false); + /** + * Creates a new DefaultPassageFormatter with the default tags. + */ + public DefaultPassageFormatter() { + this("", "", "... ", false); + } + + /** + * Creates a new DefaultPassageFormatter with custom tags. + * + * @param preTag text which should appear before a highlighted term. + * @param postTag text which should appear after a highlighted term. + * @param ellipsis text which should be used to connect two unconnected passages. + * @param escape true if text should be html-escaped + */ + public DefaultPassageFormatter(String preTag, String postTag, String ellipsis, boolean escape) { + if (preTag == null || postTag == null || ellipsis == null) { + throw new NullPointerException(); } + this.preTag = preTag; + this.postTag = postTag; + this.ellipsis = ellipsis; + this.escape = escape; + } - /** - * Creates a new DefaultPassageFormatter with custom tags. - * @param preTag text which should appear before a highlighted term. - * @param postTag text which should appear after a highlighted term. - * @param ellipsis text which should be used to connect two unconnected passages. - * @param escape true if text should be html-escaped - */ - public DefaultPassageFormatter(String preTag, String postTag, String ellipsis, boolean escape) { - if (preTag == null || postTag == null || ellipsis == null) { - throw new NullPointerException(); + @Override + public String format(Passage passages[], String content) { + StringBuilder sb = new StringBuilder(); + int pos = 0; + for (Passage passage : passages) { + // don't add ellipsis if its the first one, or if its connected. + if (passage.getStartOffset() > pos && pos > 0) { + sb.append(ellipsis); + } + pos = passage.getStartOffset(); + for (int i = 0; i < passage.getNumMatches(); i++) { + int start = passage.getMatchStarts()[i]; + int end = passage.getMatchEnds()[i]; + // its possible to have overlapping terms + if (start > pos) { + append(sb, content, pos, start); } - this.preTag = preTag; - this.postTag = postTag; - this.ellipsis = ellipsis; - this.escape = escape; - } - - @Override - public String format(Passage passages[], String content) { - StringBuilder sb = new StringBuilder(); - int pos = 0; - for (Passage passage : passages) { - // don't add ellipsis if its the first one, or if its connected. - if (passage.getStartOffset() > pos && pos > 0) { - sb.append(ellipsis); - } - pos = passage.getStartOffset(); - for (int i = 0; i < passage.getNumMatches(); i++) { - int start = passage.getMatchStarts()[i]; - int end = passage.getMatchEnds()[i]; - // its possible to have overlapping terms - if (start > pos) { - append(sb, content, pos, start); - } - if (end > pos) { - sb.append(preTag); - append(sb, content, Math.max(pos, start), end); - sb.append(postTag); - pos = end; - } - } - // its possible a "term" from the analyzer could span a sentence boundary. - append(sb, content, pos, Math.max(pos, passage.getEndOffset())); - pos = passage.getEndOffset(); + if (end > pos) { + sb.append(preTag); + append(sb, content, Math.max(pos, start), end); + sb.append(postTag); + pos = end; } - return sb.toString(); + } + // its possible a "term" from the analyzer could span a sentence boundary. + append(sb, content, pos, Math.max(pos, passage.getEndOffset())); + pos = passage.getEndOffset(); } + return sb.toString(); + } - /** - * Appends original text to the response. - * @param dest resulting text, possibly transformed or encoded - * @param content original text content - * @param start index of the first character in content - * @param end index of the character following the last character in content - */ - protected void append(StringBuilder dest, String content, int start, int end) { - if (escape) { - // note: these are the rules from owasp.org - for (int i = start; i < end; i++) { - char ch = content.charAt(i); - switch(ch) { - case '&': - dest.append("&"); - break; - case '<': - dest.append("<"); - break; - case '>': - dest.append(">"); - break; - case '"': - dest.append("""); - break; - case '\'': - dest.append("'"); - break; - case '/': - dest.append("/"); - break; - default: - if (ch >= 0x30 && ch <= 0x39 || ch >= 0x41 && ch <= 0x5A || ch >= 0x61 && ch <= 0x7A) { - dest.append(ch); - } else if (ch < 0xff) { - dest.append(""); - dest.append((int)ch); - dest.append(";"); - } else { - dest.append(ch); - } - } + /** + * Appends original text to the response. + * + * @param dest resulting text, possibly transformed or encoded + * @param content original text content + * @param start index of the first character in content + * @param end index of the character following the last character in content + */ + protected void append(StringBuilder dest, String content, int start, int end) { + if (escape) { + // note: these are the rules from owasp.org + for (int i = start; i < end; i++) { + char ch = content.charAt(i); + switch (ch) { + case '&': + dest.append("&"); + break; + case '<': + dest.append("<"); + break; + case '>': + dest.append(">"); + break; + case '"': + dest.append("""); + break; + case '\'': + dest.append("'"); + break; + case '/': + dest.append("/"); + break; + default: + if (ch >= 0x30 && ch <= 0x39 || ch >= 0x41 && ch <= 0x5A || ch >= 0x61 && ch <= 0x7A) { + dest.append(ch); + } else if (ch < 0xff) { + dest.append(""); + dest.append((int) ch); + dest.append(";"); + } else { + dest.append(ch); } - } else { - dest.append(content, start, end); } + } + } else { + dest.append(content, start, end); } + } } diff --git a/lucene/highlighter/src/java/org/apache/lucene/search/uhighlight/Passage.java b/lucene/highlighter/src/java/org/apache/lucene/search/uhighlight/Passage.java index a131d86d81f..d64b96e5e8a 100644 --- a/lucene/highlighter/src/java/org/apache/lucene/search/uhighlight/Passage.java +++ b/lucene/highlighter/src/java/org/apache/lucene/search/uhighlight/Passage.java @@ -23,7 +23,7 @@ import org.apache.lucene.util.InPlaceMergeSorter; import org.apache.lucene.util.RamUsageEstimator; /** - * Represents a passage (typically a sentence of the document). + * Represents a passage (typically a sentence of the document). *
* A passage contains {@link #getNumMatches} highlights from the query, * and the offsets and query terms that correspond with each match. @@ -31,149 +31,151 @@ import org.apache.lucene.util.RamUsageEstimator; * @lucene.experimental */ public class Passage { - private int startOffset = -1; - private int endOffset = -1; - private float score = 0.0f; + private int startOffset = -1; + private int endOffset = -1; + private float score = 0.0f; - private int[] matchStarts = new int[8]; - private int[] matchEnds = new int[8]; - private BytesRef[] matchTerms = new BytesRef[8]; - private int numMatches = 0; + private int[] matchStarts = new int[8]; + private int[] matchEnds = new int[8]; + private BytesRef[] matchTerms = new BytesRef[8]; + private int numMatches = 0; - /** @lucene.internal */ - public void addMatch(int startOffset, int endOffset, BytesRef term) { - assert startOffset >= this.startOffset && startOffset <= this.endOffset; - if (numMatches == matchStarts.length) { - int newLength = ArrayUtil.oversize(numMatches+1, RamUsageEstimator.NUM_BYTES_OBJECT_REF); - int newMatchStarts[] = new int[newLength]; - int newMatchEnds[] = new int[newLength]; - BytesRef newMatchTerms[] = new BytesRef[newLength]; - System.arraycopy(matchStarts, 0, newMatchStarts, 0, numMatches); - System.arraycopy(matchEnds, 0, newMatchEnds, 0, numMatches); - System.arraycopy(matchTerms, 0, newMatchTerms, 0, numMatches); - matchStarts = newMatchStarts; - matchEnds = newMatchEnds; - matchTerms = newMatchTerms; - } - assert matchStarts.length == matchEnds.length && matchEnds.length == matchTerms.length; - matchStarts[numMatches] = startOffset; - matchEnds[numMatches] = endOffset; - matchTerms[numMatches] = term; - numMatches++; + /** @lucene.internal */ + public void addMatch(int startOffset, int endOffset, BytesRef term) { + assert startOffset >= this.startOffset && startOffset <= this.endOffset; + if (numMatches == matchStarts.length) { + int newLength = ArrayUtil.oversize(numMatches + 1, RamUsageEstimator.NUM_BYTES_OBJECT_REF); + int newMatchStarts[] = new int[newLength]; + int newMatchEnds[] = new int[newLength]; + BytesRef newMatchTerms[] = new BytesRef[newLength]; + System.arraycopy(matchStarts, 0, newMatchStarts, 0, numMatches); + System.arraycopy(matchEnds, 0, newMatchEnds, 0, numMatches); + System.arraycopy(matchTerms, 0, newMatchTerms, 0, numMatches); + matchStarts = newMatchStarts; + matchEnds = newMatchEnds; + matchTerms = newMatchTerms; } + assert matchStarts.length == matchEnds.length && matchEnds.length == matchTerms.length; + matchStarts[numMatches] = startOffset; + matchEnds[numMatches] = endOffset; + matchTerms[numMatches] = term; + numMatches++; + } - /** @lucene.internal */ - public void sort() { - final int starts[] = matchStarts; - final int ends[] = matchEnds; - final BytesRef terms[] = matchTerms; - new InPlaceMergeSorter() { - @Override - protected void swap(int i, int j) { - int temp = starts[i]; - starts[i] = starts[j]; - starts[j] = temp; + /** @lucene.internal */ + public void sort() { + final int starts[] = matchStarts; + final int ends[] = matchEnds; + final BytesRef terms[] = matchTerms; + new InPlaceMergeSorter() { + @Override + protected void swap(int i, int j) { + int temp = starts[i]; + starts[i] = starts[j]; + starts[j] = temp; - temp = ends[i]; - ends[i] = ends[j]; - ends[j] = temp; + temp = ends[i]; + ends[i] = ends[j]; + ends[j] = temp; - BytesRef tempTerm = terms[i]; - terms[i] = terms[j]; - terms[j] = tempTerm; - } + BytesRef tempTerm = terms[i]; + terms[i] = terms[j]; + terms[j] = tempTerm; + } - @Override - protected int compare(int i, int j) { - return Integer.compare(starts[i], starts[j]); - } + @Override + protected int compare(int i, int j) { + return Integer.compare(starts[i], starts[j]); + } - }.sort(0, numMatches); - } + }.sort(0, numMatches); + } - /** @lucene.internal */ - public void reset() { - startOffset = endOffset = -1; - score = 0.0f; - numMatches = 0; - } + /** @lucene.internal */ + public void reset() { + startOffset = endOffset = -1; + score = 0.0f; + numMatches = 0; + } - /** - * Start offset of this passage. - * @return start index (inclusive) of the passage in the - * original content: always >= 0. - */ - public int getStartOffset() { - return startOffset; - } + /** + * Start offset of this passage. + * + * @return start index (inclusive) of the passage in the + * original content: always >= 0. + */ + public int getStartOffset() { + return startOffset; + } - /** - * End offset of this passage. - * @return end index (exclusive) of the passage in the - * original content: always >= {@link #getStartOffset()} - */ - public int getEndOffset() { - return endOffset; - } + /** + * End offset of this passage. + * + * @return end index (exclusive) of the passage in the + * original content: always >= {@link #getStartOffset()} + */ + public int getEndOffset() { + return endOffset; + } - /** - * Passage's score. - */ - public float getScore() { - return score; - } + /** + * Passage's score. + */ + public float getScore() { + return score; + } - /** - * Number of term matches available in - * {@link #getMatchStarts}, {@link #getMatchEnds}, - * {@link #getMatchTerms} - */ - public int getNumMatches() { - return numMatches; - } + /** + * Number of term matches available in + * {@link #getMatchStarts}, {@link #getMatchEnds}, + * {@link #getMatchTerms} + */ + public int getNumMatches() { + return numMatches; + } - /** - * Start offsets of the term matches, in increasing order. - *
- * Only {@link #getNumMatches} are valid. Note that these - * offsets are absolute (not relative to {@link #getStartOffset()}). - */ - public int[] getMatchStarts() { - return matchStarts; - } + /** + * Start offsets of the term matches, in increasing order. + *
+ * Only {@link #getNumMatches} are valid. Note that these + * offsets are absolute (not relative to {@link #getStartOffset()}). + */ + public int[] getMatchStarts() { + return matchStarts; + } - /** - * End offsets of the term matches, corresponding with {@link #getMatchStarts}. - *
- * Only {@link #getNumMatches} are valid. Note that its possible that an end offset - * could exceed beyond the bounds of the passage ({@link #getEndOffset()}), if the - * Analyzer produced a term which spans a passage boundary. - */ - public int[] getMatchEnds() { - return matchEnds; - } + /** + * End offsets of the term matches, corresponding with {@link #getMatchStarts}. + *
+ * Only {@link #getNumMatches} are valid. Note that its possible that an end offset + * could exceed beyond the bounds of the passage ({@link #getEndOffset()}), if the + * Analyzer produced a term which spans a passage boundary. + */ + public int[] getMatchEnds() { + return matchEnds; + } - /** - * BytesRef (term text) of the matches, corresponding with {@link #getMatchStarts()}. - *
- * Only {@link #getNumMatches()} are valid. - */ - public BytesRef[] getMatchTerms() { - return matchTerms; - } + /** + * BytesRef (term text) of the matches, corresponding with {@link #getMatchStarts()}. + *
+ * Only {@link #getNumMatches()} are valid. + */ + public BytesRef[] getMatchTerms() { + return matchTerms; + } - /** @lucene.internal */ - public void setStartOffset(int startOffset) { - this.startOffset = startOffset; - } + /** @lucene.internal */ + public void setStartOffset(int startOffset) { + this.startOffset = startOffset; + } - /** @lucene.internal */ - public void setEndOffset(int endOffset) { - this.endOffset = endOffset; - } + /** @lucene.internal */ + public void setEndOffset(int endOffset) { + this.endOffset = endOffset; + } - /** @lucene.internal */ - public void setScore(float score) { - this.score = score; - } + /** @lucene.internal */ + public void setScore(float score) { + this.score = score; + } }