Add support to XhtmlParser to handle 2 character HTML Entities

Previously, the parser would mangle any HTML entity greater than
0xFFFF
This commit is contained in:
Adrian Clay 2024-01-04 16:29:05 +00:00
parent 5e9f87da7d
commit b8e8e771f0
2 changed files with 14 additions and 2 deletions

View File

@ -970,9 +970,9 @@ public class XhtmlParser {
throw new FHIRFormatError("Invalid literal declaration following text: " + s);
else if (c.charAt(0) == '#') {
if (isInteger(c.substring(1), 10))
s.append((char) Integer.parseInt(c.substring(1)));
s.append(Character.toString(Integer.parseInt(c.substring(1))));
else if (c.charAt(1) == 'x' && isInteger(c.substring(2), 16))
s.append((char) Integer.parseInt(c.substring(2), 16));
s.append(Character.toString(Integer.parseInt(c.substring(2), 16)));
} else if (declaredEntities.containsKey(c)) {
s.append(declaredEntities.get(c));
} else {

View File

@ -207,5 +207,17 @@ public class XhtmlNodeTest {
Assertions.assertEquals("<div><p>This <b>is</b> a <b>long</b> paragraph</p></div>", new XhtmlComposer(true, false).compose(x));
}
@Test
public void testEntityNumberGreaterThanFFFF_Decimal() throws IOException {
XhtmlNode x = new XhtmlParser().parse("<div>&#128567;</div>", "div");
Assertions.assertEquals("\uD83D\uDE37", x.getFirstElement().getChildNodes().get(0).getContent());
}
@Test
public void testEntityNumberGreaterThanFFFF_Hex() throws IOException {
XhtmlNode x = new XhtmlParser().parse("<div>&#x1F637;</div>", "div");
Assertions.assertEquals("\uD83D\uDE37", x.getFirstElement().getChildNodes().get(0).getContent());
}
}