mirror of https://github.com/apache/lucene.git
LUCENE-5191: Fix Unicode corrumption in HTML escaping of Standard Highlighter and Fast Vector Highlighter.
git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1518839 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
6f0040797e
commit
dc0dca5172
|
@ -152,6 +152,11 @@ Bug Fixes
|
|||
* LUCENE-5192: IndexWriter could allow adding same field name with different
|
||||
DocValueTypes under some circumstances. (Shai Erera)
|
||||
|
||||
* LUCENE-5191: SimpleHTMLEncoder in Highlighter module broke Unicode
|
||||
outside BMP because it encoded UTF-16 chars instead of codepoints.
|
||||
The escaping of codepoints > 127 was removed (not needed for valid HTML)
|
||||
and missing escaping for ' and / was added. (Uwe Schindler)
|
||||
|
||||
API Changes
|
||||
|
||||
* LUCENE-5094: Add ramBytesUsed() to MultiDocValues.OrdinalMap.
|
||||
|
|
|
@ -47,34 +47,28 @@ public class SimpleHTMLEncoder implements Encoder
|
|||
{
|
||||
char ch = plainText.charAt(index);
|
||||
|
||||
switch (ch)
|
||||
{
|
||||
switch (ch) {
|
||||
case '"':
|
||||
result.append(""");
|
||||
break;
|
||||
|
||||
case '&':
|
||||
result.append("&");
|
||||
break;
|
||||
|
||||
case '<':
|
||||
result.append("<");
|
||||
break;
|
||||
|
||||
case '>':
|
||||
result.append(">");
|
||||
break;
|
||||
|
||||
case '\'':
|
||||
result.append("'");
|
||||
break;
|
||||
case '/':
|
||||
result.append("/");
|
||||
break;
|
||||
default:
|
||||
if (ch < 128)
|
||||
{
|
||||
result.append(ch);
|
||||
}
|
||||
else
|
||||
{
|
||||
result.append("&#").append((int)ch).append(";");
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return result.toString();
|
||||
|
|
|
@ -85,7 +85,7 @@ public class SimpleFragmentsBuilderTest extends AbstractTestCase {
|
|||
SimpleFragmentsBuilder sfb = new SimpleFragmentsBuilder();
|
||||
String[] preTags = { "[" };
|
||||
String[] postTags = { "]" };
|
||||
assertEquals( "<h1> [a] </h1>",
|
||||
assertEquals( "<h1> [a] </h1>",
|
||||
sfb.createFragment( reader, 0, F, ffl, preTags, postTags, new SimpleHTMLEncoder() ) );
|
||||
}
|
||||
|
||||
|
|
Loading…
Reference in New Issue