LUCENE-5191: Fix Unicode corrumption in HTML escaping of Standard Highlighter and Fast Vector Highlighter.

git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1518839 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Uwe Schindler 2013-08-29 22:01:46 +00:00
parent 6f0040797e
commit dc0dca5172
3 changed files with 14 additions and 15 deletions

View File

@ -152,6 +152,11 @@ Bug Fixes
* LUCENE-5192: IndexWriter could allow adding same field name with different * LUCENE-5192: IndexWriter could allow adding same field name with different
DocValueTypes under some circumstances. (Shai Erera) DocValueTypes under some circumstances. (Shai Erera)
* LUCENE-5191: SimpleHTMLEncoder in Highlighter module broke Unicode
outside BMP because it encoded UTF-16 chars instead of codepoints.
The escaping of codepoints > 127 was removed (not needed for valid HTML)
and missing escaping for ' and / was added. (Uwe Schindler)
API Changes API Changes
* LUCENE-5094: Add ramBytesUsed() to MultiDocValues.OrdinalMap. * LUCENE-5094: Add ramBytesUsed() to MultiDocValues.OrdinalMap.

View File

@ -47,33 +47,27 @@ public class SimpleHTMLEncoder implements Encoder
{ {
char ch = plainText.charAt(index); char ch = plainText.charAt(index);
switch (ch) switch (ch) {
{
case '"': case '"':
result.append("""); result.append(""");
break; break;
case '&': case '&':
result.append("&"); result.append("&");
break; break;
case '<': case '<':
result.append("&lt;"); result.append("&lt;");
break; break;
case '>': case '>':
result.append("&gt;"); result.append("&gt;");
break; break;
case '\'':
result.append("&#x27;");
break;
case '/':
result.append("&#x2F;");
break;
default: default:
if (ch < 128) result.append(ch);
{
result.append(ch);
}
else
{
result.append("&#").append((int)ch).append(";");
}
} }
} }

View File

@ -85,7 +85,7 @@ public class SimpleFragmentsBuilderTest extends AbstractTestCase {
SimpleFragmentsBuilder sfb = new SimpleFragmentsBuilder(); SimpleFragmentsBuilder sfb = new SimpleFragmentsBuilder();
String[] preTags = { "[" }; String[] preTags = { "[" };
String[] postTags = { "]" }; String[] postTags = { "]" };
assertEquals( "&lt;h1&gt; [a] &lt;/h1&gt;", assertEquals( "&lt;h1&gt; [a] &lt;&#x2F;h1&gt;",
sfb.createFragment( reader, 0, F, ffl, preTags, postTags, new SimpleHTMLEncoder() ) ); sfb.createFragment( reader, 0, F, ffl, preTags, postTags, new SimpleHTMLEncoder() ) );
} }