mirror of https://github.com/apache/lucene.git
LUCENE-5191: Fix Unicode corrumption in HTML escaping of Standard Highlighter and Fast Vector Highlighter.
git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1518839 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
6f0040797e
commit
dc0dca5172
|
@ -152,6 +152,11 @@ Bug Fixes
|
||||||
* LUCENE-5192: IndexWriter could allow adding same field name with different
|
* LUCENE-5192: IndexWriter could allow adding same field name with different
|
||||||
DocValueTypes under some circumstances. (Shai Erera)
|
DocValueTypes under some circumstances. (Shai Erera)
|
||||||
|
|
||||||
|
* LUCENE-5191: SimpleHTMLEncoder in Highlighter module broke Unicode
|
||||||
|
outside BMP because it encoded UTF-16 chars instead of codepoints.
|
||||||
|
The escaping of codepoints > 127 was removed (not needed for valid HTML)
|
||||||
|
and missing escaping for ' and / was added. (Uwe Schindler)
|
||||||
|
|
||||||
API Changes
|
API Changes
|
||||||
|
|
||||||
* LUCENE-5094: Add ramBytesUsed() to MultiDocValues.OrdinalMap.
|
* LUCENE-5094: Add ramBytesUsed() to MultiDocValues.OrdinalMap.
|
||||||
|
|
|
@ -47,33 +47,27 @@ public class SimpleHTMLEncoder implements Encoder
|
||||||
{
|
{
|
||||||
char ch = plainText.charAt(index);
|
char ch = plainText.charAt(index);
|
||||||
|
|
||||||
switch (ch)
|
switch (ch) {
|
||||||
{
|
|
||||||
case '"':
|
case '"':
|
||||||
result.append(""");
|
result.append(""");
|
||||||
break;
|
break;
|
||||||
|
|
||||||
case '&':
|
case '&':
|
||||||
result.append("&");
|
result.append("&");
|
||||||
break;
|
break;
|
||||||
|
|
||||||
case '<':
|
case '<':
|
||||||
result.append("<");
|
result.append("<");
|
||||||
break;
|
break;
|
||||||
|
|
||||||
case '>':
|
case '>':
|
||||||
result.append(">");
|
result.append(">");
|
||||||
break;
|
break;
|
||||||
|
case '\'':
|
||||||
|
result.append("'");
|
||||||
|
break;
|
||||||
|
case '/':
|
||||||
|
result.append("/");
|
||||||
|
break;
|
||||||
default:
|
default:
|
||||||
if (ch < 128)
|
result.append(ch);
|
||||||
{
|
|
||||||
result.append(ch);
|
|
||||||
}
|
|
||||||
else
|
|
||||||
{
|
|
||||||
result.append("&#").append((int)ch).append(";");
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -85,7 +85,7 @@ public class SimpleFragmentsBuilderTest extends AbstractTestCase {
|
||||||
SimpleFragmentsBuilder sfb = new SimpleFragmentsBuilder();
|
SimpleFragmentsBuilder sfb = new SimpleFragmentsBuilder();
|
||||||
String[] preTags = { "[" };
|
String[] preTags = { "[" };
|
||||||
String[] postTags = { "]" };
|
String[] postTags = { "]" };
|
||||||
assertEquals( "<h1> [a] </h1>",
|
assertEquals( "<h1> [a] </h1>",
|
||||||
sfb.createFragment( reader, 0, F, ffl, preTags, postTags, new SimpleHTMLEncoder() ) );
|
sfb.createFragment( reader, 0, F, ffl, preTags, postTags, new SimpleHTMLEncoder() ) );
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
Loading…
Reference in New Issue