mirror of https://github.com/apache/lucene.git
LUCENE-7559: fix indentation of entire file
This commit is contained in:
parent
c51e89014a
commit
cbc83929e7
|
@ -24,115 +24,117 @@ package org.apache.lucene.search.uhighlight;
|
|||
* ellipses between unconnected passages.
|
||||
*/
|
||||
public class DefaultPassageFormatter extends PassageFormatter {
|
||||
/** text that will appear before highlighted terms */
|
||||
protected final String preTag;
|
||||
/** text that will appear after highlighted terms */
|
||||
protected final String postTag;
|
||||
/** text that will appear between two unconnected passages */
|
||||
protected final String ellipsis;
|
||||
/** true if we should escape for html */
|
||||
protected final boolean escape;
|
||||
/** text that will appear before highlighted terms */
|
||||
protected final String preTag;
|
||||
/** text that will appear after highlighted terms */
|
||||
protected final String postTag;
|
||||
/** text that will appear between two unconnected passages */
|
||||
protected final String ellipsis;
|
||||
/** true if we should escape for html */
|
||||
protected final boolean escape;
|
||||
|
||||
/**
|
||||
* Creates a new DefaultPassageFormatter with the default tags.
|
||||
*/
|
||||
public DefaultPassageFormatter() {
|
||||
this("<b>", "</b>", "... ", false);
|
||||
/**
|
||||
* Creates a new DefaultPassageFormatter with the default tags.
|
||||
*/
|
||||
public DefaultPassageFormatter() {
|
||||
this("<b>", "</b>", "... ", false);
|
||||
}
|
||||
|
||||
/**
|
||||
* Creates a new DefaultPassageFormatter with custom tags.
|
||||
*
|
||||
* @param preTag text which should appear before a highlighted term.
|
||||
* @param postTag text which should appear after a highlighted term.
|
||||
* @param ellipsis text which should be used to connect two unconnected passages.
|
||||
* @param escape true if text should be html-escaped
|
||||
*/
|
||||
public DefaultPassageFormatter(String preTag, String postTag, String ellipsis, boolean escape) {
|
||||
if (preTag == null || postTag == null || ellipsis == null) {
|
||||
throw new NullPointerException();
|
||||
}
|
||||
this.preTag = preTag;
|
||||
this.postTag = postTag;
|
||||
this.ellipsis = ellipsis;
|
||||
this.escape = escape;
|
||||
}
|
||||
|
||||
/**
|
||||
* Creates a new DefaultPassageFormatter with custom tags.
|
||||
* @param preTag text which should appear before a highlighted term.
|
||||
* @param postTag text which should appear after a highlighted term.
|
||||
* @param ellipsis text which should be used to connect two unconnected passages.
|
||||
* @param escape true if text should be html-escaped
|
||||
*/
|
||||
public DefaultPassageFormatter(String preTag, String postTag, String ellipsis, boolean escape) {
|
||||
if (preTag == null || postTag == null || ellipsis == null) {
|
||||
throw new NullPointerException();
|
||||
@Override
|
||||
public String format(Passage passages[], String content) {
|
||||
StringBuilder sb = new StringBuilder();
|
||||
int pos = 0;
|
||||
for (Passage passage : passages) {
|
||||
// don't add ellipsis if its the first one, or if its connected.
|
||||
if (passage.getStartOffset() > pos && pos > 0) {
|
||||
sb.append(ellipsis);
|
||||
}
|
||||
pos = passage.getStartOffset();
|
||||
for (int i = 0; i < passage.getNumMatches(); i++) {
|
||||
int start = passage.getMatchStarts()[i];
|
||||
int end = passage.getMatchEnds()[i];
|
||||
// its possible to have overlapping terms
|
||||
if (start > pos) {
|
||||
append(sb, content, pos, start);
|
||||
}
|
||||
this.preTag = preTag;
|
||||
this.postTag = postTag;
|
||||
this.ellipsis = ellipsis;
|
||||
this.escape = escape;
|
||||
}
|
||||
|
||||
@Override
|
||||
public String format(Passage passages[], String content) {
|
||||
StringBuilder sb = new StringBuilder();
|
||||
int pos = 0;
|
||||
for (Passage passage : passages) {
|
||||
// don't add ellipsis if its the first one, or if its connected.
|
||||
if (passage.getStartOffset() > pos && pos > 0) {
|
||||
sb.append(ellipsis);
|
||||
}
|
||||
pos = passage.getStartOffset();
|
||||
for (int i = 0; i < passage.getNumMatches(); i++) {
|
||||
int start = passage.getMatchStarts()[i];
|
||||
int end = passage.getMatchEnds()[i];
|
||||
// its possible to have overlapping terms
|
||||
if (start > pos) {
|
||||
append(sb, content, pos, start);
|
||||
}
|
||||
if (end > pos) {
|
||||
sb.append(preTag);
|
||||
append(sb, content, Math.max(pos, start), end);
|
||||
sb.append(postTag);
|
||||
pos = end;
|
||||
}
|
||||
}
|
||||
// its possible a "term" from the analyzer could span a sentence boundary.
|
||||
append(sb, content, pos, Math.max(pos, passage.getEndOffset()));
|
||||
pos = passage.getEndOffset();
|
||||
if (end > pos) {
|
||||
sb.append(preTag);
|
||||
append(sb, content, Math.max(pos, start), end);
|
||||
sb.append(postTag);
|
||||
pos = end;
|
||||
}
|
||||
return sb.toString();
|
||||
}
|
||||
// its possible a "term" from the analyzer could span a sentence boundary.
|
||||
append(sb, content, pos, Math.max(pos, passage.getEndOffset()));
|
||||
pos = passage.getEndOffset();
|
||||
}
|
||||
return sb.toString();
|
||||
}
|
||||
|
||||
/**
|
||||
* Appends original text to the response.
|
||||
* @param dest resulting text, possibly transformed or encoded
|
||||
* @param content original text content
|
||||
* @param start index of the first character in content
|
||||
* @param end index of the character following the last character in content
|
||||
*/
|
||||
protected void append(StringBuilder dest, String content, int start, int end) {
|
||||
if (escape) {
|
||||
// note: these are the rules from owasp.org
|
||||
for (int i = start; i < end; i++) {
|
||||
char ch = content.charAt(i);
|
||||
switch(ch) {
|
||||
case '&':
|
||||
dest.append("&");
|
||||
break;
|
||||
case '<':
|
||||
dest.append("<");
|
||||
break;
|
||||
case '>':
|
||||
dest.append(">");
|
||||
break;
|
||||
case '"':
|
||||
dest.append(""");
|
||||
break;
|
||||
case '\'':
|
||||
dest.append("'");
|
||||
break;
|
||||
case '/':
|
||||
dest.append("/");
|
||||
break;
|
||||
default:
|
||||
if (ch >= 0x30 && ch <= 0x39 || ch >= 0x41 && ch <= 0x5A || ch >= 0x61 && ch <= 0x7A) {
|
||||
dest.append(ch);
|
||||
} else if (ch < 0xff) {
|
||||
dest.append("&#");
|
||||
dest.append((int)ch);
|
||||
dest.append(";");
|
||||
} else {
|
||||
dest.append(ch);
|
||||
}
|
||||
}
|
||||
/**
|
||||
* Appends original text to the response.
|
||||
*
|
||||
* @param dest resulting text, possibly transformed or encoded
|
||||
* @param content original text content
|
||||
* @param start index of the first character in content
|
||||
* @param end index of the character following the last character in content
|
||||
*/
|
||||
protected void append(StringBuilder dest, String content, int start, int end) {
|
||||
if (escape) {
|
||||
// note: these are the rules from owasp.org
|
||||
for (int i = start; i < end; i++) {
|
||||
char ch = content.charAt(i);
|
||||
switch (ch) {
|
||||
case '&':
|
||||
dest.append("&");
|
||||
break;
|
||||
case '<':
|
||||
dest.append("<");
|
||||
break;
|
||||
case '>':
|
||||
dest.append(">");
|
||||
break;
|
||||
case '"':
|
||||
dest.append(""");
|
||||
break;
|
||||
case '\'':
|
||||
dest.append("'");
|
||||
break;
|
||||
case '/':
|
||||
dest.append("/");
|
||||
break;
|
||||
default:
|
||||
if (ch >= 0x30 && ch <= 0x39 || ch >= 0x41 && ch <= 0x5A || ch >= 0x61 && ch <= 0x7A) {
|
||||
dest.append(ch);
|
||||
} else if (ch < 0xff) {
|
||||
dest.append("&#");
|
||||
dest.append((int) ch);
|
||||
dest.append(";");
|
||||
} else {
|
||||
dest.append(ch);
|
||||
}
|
||||
} else {
|
||||
dest.append(content, start, end);
|
||||
}
|
||||
}
|
||||
} else {
|
||||
dest.append(content, start, end);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
@ -23,7 +23,7 @@ import org.apache.lucene.util.InPlaceMergeSorter;
|
|||
import org.apache.lucene.util.RamUsageEstimator;
|
||||
|
||||
/**
|
||||
* Represents a passage (typically a sentence of the document).
|
||||
* Represents a passage (typically a sentence of the document).
|
||||
* <p>
|
||||
* A passage contains {@link #getNumMatches} highlights from the query,
|
||||
* and the offsets and query terms that correspond with each match.
|
||||
|
@ -31,149 +31,151 @@ import org.apache.lucene.util.RamUsageEstimator;
|
|||
* @lucene.experimental
|
||||
*/
|
||||
public class Passage {
|
||||
private int startOffset = -1;
|
||||
private int endOffset = -1;
|
||||
private float score = 0.0f;
|
||||
private int startOffset = -1;
|
||||
private int endOffset = -1;
|
||||
private float score = 0.0f;
|
||||
|
||||
private int[] matchStarts = new int[8];
|
||||
private int[] matchEnds = new int[8];
|
||||
private BytesRef[] matchTerms = new BytesRef[8];
|
||||
private int numMatches = 0;
|
||||
private int[] matchStarts = new int[8];
|
||||
private int[] matchEnds = new int[8];
|
||||
private BytesRef[] matchTerms = new BytesRef[8];
|
||||
private int numMatches = 0;
|
||||
|
||||
/** @lucene.internal */
|
||||
public void addMatch(int startOffset, int endOffset, BytesRef term) {
|
||||
assert startOffset >= this.startOffset && startOffset <= this.endOffset;
|
||||
if (numMatches == matchStarts.length) {
|
||||
int newLength = ArrayUtil.oversize(numMatches+1, RamUsageEstimator.NUM_BYTES_OBJECT_REF);
|
||||
int newMatchStarts[] = new int[newLength];
|
||||
int newMatchEnds[] = new int[newLength];
|
||||
BytesRef newMatchTerms[] = new BytesRef[newLength];
|
||||
System.arraycopy(matchStarts, 0, newMatchStarts, 0, numMatches);
|
||||
System.arraycopy(matchEnds, 0, newMatchEnds, 0, numMatches);
|
||||
System.arraycopy(matchTerms, 0, newMatchTerms, 0, numMatches);
|
||||
matchStarts = newMatchStarts;
|
||||
matchEnds = newMatchEnds;
|
||||
matchTerms = newMatchTerms;
|
||||
}
|
||||
assert matchStarts.length == matchEnds.length && matchEnds.length == matchTerms.length;
|
||||
matchStarts[numMatches] = startOffset;
|
||||
matchEnds[numMatches] = endOffset;
|
||||
matchTerms[numMatches] = term;
|
||||
numMatches++;
|
||||
/** @lucene.internal */
|
||||
public void addMatch(int startOffset, int endOffset, BytesRef term) {
|
||||
assert startOffset >= this.startOffset && startOffset <= this.endOffset;
|
||||
if (numMatches == matchStarts.length) {
|
||||
int newLength = ArrayUtil.oversize(numMatches + 1, RamUsageEstimator.NUM_BYTES_OBJECT_REF);
|
||||
int newMatchStarts[] = new int[newLength];
|
||||
int newMatchEnds[] = new int[newLength];
|
||||
BytesRef newMatchTerms[] = new BytesRef[newLength];
|
||||
System.arraycopy(matchStarts, 0, newMatchStarts, 0, numMatches);
|
||||
System.arraycopy(matchEnds, 0, newMatchEnds, 0, numMatches);
|
||||
System.arraycopy(matchTerms, 0, newMatchTerms, 0, numMatches);
|
||||
matchStarts = newMatchStarts;
|
||||
matchEnds = newMatchEnds;
|
||||
matchTerms = newMatchTerms;
|
||||
}
|
||||
assert matchStarts.length == matchEnds.length && matchEnds.length == matchTerms.length;
|
||||
matchStarts[numMatches] = startOffset;
|
||||
matchEnds[numMatches] = endOffset;
|
||||
matchTerms[numMatches] = term;
|
||||
numMatches++;
|
||||
}
|
||||
|
||||
/** @lucene.internal */
|
||||
public void sort() {
|
||||
final int starts[] = matchStarts;
|
||||
final int ends[] = matchEnds;
|
||||
final BytesRef terms[] = matchTerms;
|
||||
new InPlaceMergeSorter() {
|
||||
@Override
|
||||
protected void swap(int i, int j) {
|
||||
int temp = starts[i];
|
||||
starts[i] = starts[j];
|
||||
starts[j] = temp;
|
||||
/** @lucene.internal */
|
||||
public void sort() {
|
||||
final int starts[] = matchStarts;
|
||||
final int ends[] = matchEnds;
|
||||
final BytesRef terms[] = matchTerms;
|
||||
new InPlaceMergeSorter() {
|
||||
@Override
|
||||
protected void swap(int i, int j) {
|
||||
int temp = starts[i];
|
||||
starts[i] = starts[j];
|
||||
starts[j] = temp;
|
||||
|
||||
temp = ends[i];
|
||||
ends[i] = ends[j];
|
||||
ends[j] = temp;
|
||||
temp = ends[i];
|
||||
ends[i] = ends[j];
|
||||
ends[j] = temp;
|
||||
|
||||
BytesRef tempTerm = terms[i];
|
||||
terms[i] = terms[j];
|
||||
terms[j] = tempTerm;
|
||||
}
|
||||
BytesRef tempTerm = terms[i];
|
||||
terms[i] = terms[j];
|
||||
terms[j] = tempTerm;
|
||||
}
|
||||
|
||||
@Override
|
||||
protected int compare(int i, int j) {
|
||||
return Integer.compare(starts[i], starts[j]);
|
||||
}
|
||||
@Override
|
||||
protected int compare(int i, int j) {
|
||||
return Integer.compare(starts[i], starts[j]);
|
||||
}
|
||||
|
||||
}.sort(0, numMatches);
|
||||
}
|
||||
}.sort(0, numMatches);
|
||||
}
|
||||
|
||||
/** @lucene.internal */
|
||||
public void reset() {
|
||||
startOffset = endOffset = -1;
|
||||
score = 0.0f;
|
||||
numMatches = 0;
|
||||
}
|
||||
/** @lucene.internal */
|
||||
public void reset() {
|
||||
startOffset = endOffset = -1;
|
||||
score = 0.0f;
|
||||
numMatches = 0;
|
||||
}
|
||||
|
||||
/**
|
||||
* Start offset of this passage.
|
||||
* @return start index (inclusive) of the passage in the
|
||||
* original content: always >= 0.
|
||||
*/
|
||||
public int getStartOffset() {
|
||||
return startOffset;
|
||||
}
|
||||
/**
|
||||
* Start offset of this passage.
|
||||
*
|
||||
* @return start index (inclusive) of the passage in the
|
||||
* original content: always >= 0.
|
||||
*/
|
||||
public int getStartOffset() {
|
||||
return startOffset;
|
||||
}
|
||||
|
||||
/**
|
||||
* End offset of this passage.
|
||||
* @return end index (exclusive) of the passage in the
|
||||
* original content: always >= {@link #getStartOffset()}
|
||||
*/
|
||||
public int getEndOffset() {
|
||||
return endOffset;
|
||||
}
|
||||
/**
|
||||
* End offset of this passage.
|
||||
*
|
||||
* @return end index (exclusive) of the passage in the
|
||||
* original content: always >= {@link #getStartOffset()}
|
||||
*/
|
||||
public int getEndOffset() {
|
||||
return endOffset;
|
||||
}
|
||||
|
||||
/**
|
||||
* Passage's score.
|
||||
*/
|
||||
public float getScore() {
|
||||
return score;
|
||||
}
|
||||
/**
|
||||
* Passage's score.
|
||||
*/
|
||||
public float getScore() {
|
||||
return score;
|
||||
}
|
||||
|
||||
/**
|
||||
* Number of term matches available in
|
||||
* {@link #getMatchStarts}, {@link #getMatchEnds},
|
||||
* {@link #getMatchTerms}
|
||||
*/
|
||||
public int getNumMatches() {
|
||||
return numMatches;
|
||||
}
|
||||
/**
|
||||
* Number of term matches available in
|
||||
* {@link #getMatchStarts}, {@link #getMatchEnds},
|
||||
* {@link #getMatchTerms}
|
||||
*/
|
||||
public int getNumMatches() {
|
||||
return numMatches;
|
||||
}
|
||||
|
||||
/**
|
||||
* Start offsets of the term matches, in increasing order.
|
||||
* <p>
|
||||
* Only {@link #getNumMatches} are valid. Note that these
|
||||
* offsets are absolute (not relative to {@link #getStartOffset()}).
|
||||
*/
|
||||
public int[] getMatchStarts() {
|
||||
return matchStarts;
|
||||
}
|
||||
/**
|
||||
* Start offsets of the term matches, in increasing order.
|
||||
* <p>
|
||||
* Only {@link #getNumMatches} are valid. Note that these
|
||||
* offsets are absolute (not relative to {@link #getStartOffset()}).
|
||||
*/
|
||||
public int[] getMatchStarts() {
|
||||
return matchStarts;
|
||||
}
|
||||
|
||||
/**
|
||||
* End offsets of the term matches, corresponding with {@link #getMatchStarts}.
|
||||
* <p>
|
||||
* Only {@link #getNumMatches} are valid. Note that its possible that an end offset
|
||||
* could exceed beyond the bounds of the passage ({@link #getEndOffset()}), if the
|
||||
* Analyzer produced a term which spans a passage boundary.
|
||||
*/
|
||||
public int[] getMatchEnds() {
|
||||
return matchEnds;
|
||||
}
|
||||
/**
|
||||
* End offsets of the term matches, corresponding with {@link #getMatchStarts}.
|
||||
* <p>
|
||||
* Only {@link #getNumMatches} are valid. Note that its possible that an end offset
|
||||
* could exceed beyond the bounds of the passage ({@link #getEndOffset()}), if the
|
||||
* Analyzer produced a term which spans a passage boundary.
|
||||
*/
|
||||
public int[] getMatchEnds() {
|
||||
return matchEnds;
|
||||
}
|
||||
|
||||
/**
|
||||
* BytesRef (term text) of the matches, corresponding with {@link #getMatchStarts()}.
|
||||
* <p>
|
||||
* Only {@link #getNumMatches()} are valid.
|
||||
*/
|
||||
public BytesRef[] getMatchTerms() {
|
||||
return matchTerms;
|
||||
}
|
||||
/**
|
||||
* BytesRef (term text) of the matches, corresponding with {@link #getMatchStarts()}.
|
||||
* <p>
|
||||
* Only {@link #getNumMatches()} are valid.
|
||||
*/
|
||||
public BytesRef[] getMatchTerms() {
|
||||
return matchTerms;
|
||||
}
|
||||
|
||||
/** @lucene.internal */
|
||||
public void setStartOffset(int startOffset) {
|
||||
this.startOffset = startOffset;
|
||||
}
|
||||
/** @lucene.internal */
|
||||
public void setStartOffset(int startOffset) {
|
||||
this.startOffset = startOffset;
|
||||
}
|
||||
|
||||
/** @lucene.internal */
|
||||
public void setEndOffset(int endOffset) {
|
||||
this.endOffset = endOffset;
|
||||
}
|
||||
/** @lucene.internal */
|
||||
public void setEndOffset(int endOffset) {
|
||||
this.endOffset = endOffset;
|
||||
}
|
||||
|
||||
/** @lucene.internal */
|
||||
public void setScore(float score) {
|
||||
this.score = score;
|
||||
}
|
||||
/** @lucene.internal */
|
||||
public void setScore(float score) {
|
||||
this.score = score;
|
||||
}
|
||||
}
|
||||
|
|
Loading…
Reference in New Issue