diff --git a/lucene/CHANGES.txt b/lucene/CHANGES.txt index d8df7fcae7e..84f0e053bbf 100644 --- a/lucene/CHANGES.txt +++ b/lucene/CHANGES.txt @@ -152,6 +152,11 @@ Bug Fixes * LUCENE-5192: IndexWriter could allow adding same field name with different DocValueTypes under some circumstances. (Shai Erera) +* LUCENE-5191: SimpleHTMLEncoder in Highlighter module broke Unicode + outside BMP because it encoded UTF-16 chars instead of codepoints. + The escaping of codepoints > 127 was removed (not needed for valid HTML) + and missing escaping for ' and / was added. (Uwe Schindler) + API Changes * LUCENE-5094: Add ramBytesUsed() to MultiDocValues.OrdinalMap. diff --git a/lucene/highlighter/src/java/org/apache/lucene/search/highlight/SimpleHTMLEncoder.java b/lucene/highlighter/src/java/org/apache/lucene/search/highlight/SimpleHTMLEncoder.java index 3eda6081c8d..003645d5bf1 100644 --- a/lucene/highlighter/src/java/org/apache/lucene/search/highlight/SimpleHTMLEncoder.java +++ b/lucene/highlighter/src/java/org/apache/lucene/search/highlight/SimpleHTMLEncoder.java @@ -47,33 +47,27 @@ public class SimpleHTMLEncoder implements Encoder { char ch = plainText.charAt(index); - switch (ch) - { + switch (ch) { case '"': result.append("""); break; - case '&': result.append("&"); break; - case '<': result.append("<"); break; - case '>': result.append(">"); break; - + case '\'': + result.append("'"); + break; + case '/': + result.append("/"); + break; default: - if (ch < 128) - { - result.append(ch); - } - else - { - result.append("&#").append((int)ch).append(";"); - } + result.append(ch); } } diff --git a/lucene/highlighter/src/test/org/apache/lucene/search/vectorhighlight/SimpleFragmentsBuilderTest.java b/lucene/highlighter/src/test/org/apache/lucene/search/vectorhighlight/SimpleFragmentsBuilderTest.java index 012e1bd9f5b..3dbd12b6613 100644 --- a/lucene/highlighter/src/test/org/apache/lucene/search/vectorhighlight/SimpleFragmentsBuilderTest.java +++ b/lucene/highlighter/src/test/org/apache/lucene/search/vectorhighlight/SimpleFragmentsBuilderTest.java @@ -85,7 +85,7 @@ public class SimpleFragmentsBuilderTest extends AbstractTestCase { SimpleFragmentsBuilder sfb = new SimpleFragmentsBuilder(); String[] preTags = { "[" }; String[] postTags = { "]" }; - assertEquals( "<h1> [a] </h1>", + assertEquals( "<h1> [a] </h1>", sfb.createFragment( reader, 0, F, ffl, preTags, postTags, new SimpleHTMLEncoder() ) ); }