Merge pull request #1532 from adrianclay/handle-multi-character-html-entities

Add support to XhtmlParser to handle 2 character HTML Entities
This commit is contained in:
Grahame Grieve 2024-01-10 09:06:02 +11:00 committed by GitHub
commit 096033c002
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
2 changed files with 14 additions and 2 deletions

View File

@ -970,9 +970,9 @@ public class XhtmlParser {
throw new FHIRFormatError("Invalid literal declaration following text: " + s);
else if (c.charAt(0) == '#') {
if (isInteger(c.substring(1), 10))
s.append((char) Integer.parseInt(c.substring(1)));
s.append(Character.toString(Integer.parseInt(c.substring(1))));
else if (c.charAt(1) == 'x' && isInteger(c.substring(2), 16))
s.append((char) Integer.parseInt(c.substring(2), 16));
s.append(Character.toString(Integer.parseInt(c.substring(2), 16)));
} else if (declaredEntities.containsKey(c)) {
s.append(declaredEntities.get(c));
} else {

View File

@ -207,5 +207,17 @@ public class XhtmlNodeTest {
Assertions.assertEquals("<div><p>This <b>is</b> a <b>long</b> paragraph</p></div>", new XhtmlComposer(true, false).compose(x));
}
@Test
public void testEntityNumberGreaterThanFFFF_Decimal() throws IOException {
XhtmlNode x = new XhtmlParser().parse("<div>&#128567;</div>", "div");
Assertions.assertEquals("\uD83D\uDE37", x.getFirstElement().getChildNodes().get(0).getContent());
}
@Test
public void testEntityNumberGreaterThanFFFF_Hex() throws IOException {
XhtmlNode x = new XhtmlParser().parse("<div>&#x1F637;</div>", "div");
Assertions.assertEquals("\uD83D\uDE37", x.getFirstElement().getChildNodes().get(0).getContent());
}
}