fix bug parsing <script> in xhtml - handling < characters

This commit is contained in:
Grahame Grieve 2025-01-30 00:53:09 +11:00
parent ac4f050bac
commit 7aae2cb525
3 changed files with 65 additions and 11 deletions

View File

@ -290,9 +290,12 @@ public class XhtmlComposer {
if (node.getName() == "head" && node.getElement("meta") == null)
dst.append(indent + " <meta http-equiv=\"Content-Type\" content=\"text/html; charset=UTF-8\"/>" + (pretty && !noPrettyOverride ? "\r\n" : ""));
if (act && "script".equals(node.getName())) {
dst.append(node.allText());
} else {
for (XhtmlNode c : node.getChildNodes())
writeNode(indent + " ", c, noPrettyOverride || node.isNoPretty());
}
if (act)
dst.append("</" + node.getName() + ">" + (pretty && !noPrettyOverride ? "\r\n" : ""));
else if (node.getChildNodes().get(node.getChildNodes().size() - 1).getNodeType() == NodeType.Text)

View File

@ -513,7 +513,7 @@ public class XhtmlParser {
} else {
unwindPoint = null;
List<XhtmlNode> p = new ArrayList<>();
parseElementInner(root, p, nsm, true);
parseElementInner(root, p, nsm);
root.setEmptyExpanded(true);
}
return result;
@ -580,17 +580,16 @@ public class XhtmlParser {
return nodeNamespaceMap.hasDefaultNamespace() && (parentNamespaceMap == null || !nodeNamespaceMap.getDefaultNamespace().equals(parentNamespaceMap.getDefaultNamespace()));
}
private void addTextNode(XhtmlNode node, StringBuilder s)
{
private void addTextNode(XhtmlNode node, StringBuilder s) {
String t = isTrimWhitespace() ? s.toString().trim() : s.toString();
if (t.length() > 0)
{
if (t.length() > 0) {
lastText = t;
node.addText(t).setLocation(markLocation());
s.setLength(0);
}
}
private void parseElementInner(XhtmlNode node, List<XhtmlNode> parents, NamespaceNormalizationMap nsm, boolean escaping) throws FHIRFormatError, IOException
private void parseElementInner(XhtmlNode node, List<XhtmlNode> parents, NamespaceNormalizationMap nsm) throws FHIRFormatError, IOException
{
StringBuilder s = new StringBuilder();
while (peekChar() != END_OF_CHARS && !parents.contains(unwindPoint) && !(node == unwindPoint))
@ -659,6 +658,23 @@ public class XhtmlParser {
addTextNode(node, s);
}
private void parseScriptInner(XhtmlNode node) throws FHIRFormatError, IOException {
StringBuilder s = new StringBuilder();
while (peekChar() != END_OF_CHARS && !s.toString().endsWith("</script>")) {
s.append(readChar());
}
String ss = s.toString();
if (ss.length() >= 9) {
ss = ss.substring(0, ss.length()-9);
}
String t = isTrimWhitespace() ? ss.trim() : ss;
if (t.length() > 0) {
lastText = t;
node.addText(t).setLocation(markLocation());
}
}
private void parseElement(XhtmlNode parent, List<XhtmlNode> parents, NamespaceNormalizationMap namespaceMap) throws IOException, FHIRFormatError
{
markLocation();
@ -676,9 +692,11 @@ public class XhtmlParser {
throw new FHIRFormatError("unexpected non-end of element "+name+" "+descLoc());
readChar();
node.setEmptyExpanded(false);
} else if ("script".equals(name.getName())) {
parseScriptInner(node);
} else {
node.setEmptyExpanded(true);
parseElementInner(node, newParents, namespaceMap, "script".equals(name.getName()));
parseElementInner(node, newParents, namespaceMap);
}
}
@ -1341,7 +1359,7 @@ public class XhtmlParser {
result.setName(n);
unwindPoint = null;
List<XhtmlNode> p = new ArrayList<>();
parseElementInner(result, p, null, true);
parseElementInner(result, p, null);
return result;
}

View File

@ -1,11 +1,38 @@
package org.hl7.fhir.utilities.xhtml;
import org.junit.jupiter.api.Assertions;
import org.junit.jupiter.api.Test;
import static org.junit.jupiter.api.Assertions.assertTrue;
import java.io.IOException;
import org.hl7.fhir.exceptions.FHIRFormatError;
public class XhtmlTests {
private static final String SOURCE_SCRIPT =
"<?xml version=\"1.0\" encoding=\"UTF-8\"?>\r\n"+
"<!DOCTYPE HTML>\r\n"+
"<html xml:lang=\"en\" xmlns=\"http://www.w3.org/1999/xhtml\" lang=\"en\">\r\n"+
" <head>\r\n"+
" <title>This</title>\r\n"+
" <script type=\"text/javascript\" src=\"fhir-table-scripts.js\"> </script>\r\n"+
" </head>\r\n"+
" <body onload=\"document.body.style.opacity='1'\">\r\n"+
" <script src=\"assets/js/prism.js\"></script>\r\n"+
"<script>\r\n"+
" var statements = document.getElementById(\"statements\");\r\n"+
" var requirements = statements.getElementsByClassName(\"requirement\");\r\n"+
" for(var req of requirements) {\r\n"+
" req.innerHTML = req.innerHTML.replace(/\\[\\[([^\\]]+)\\]\\]/g, '<a href=\"Requirements-EHRSFMR2.1-\\$1.html\">\\$1</a>')\r\n"+
" }\r\n"+
" var description = document.getElementById(\"description\");\r\n"+
" description.innerHTML = description.innerHTML.replace(/&lt;/g,'<').replace(/&gt;/g,'>');\r\n"+
"</script>\r\n"+
" </body>\r\n"+
"</html>\r\n";
@Test
public void testToStringOnNullType()
{
@ -13,4 +40,10 @@ public class XhtmlTests {
String actual = node.toString();
assertTrue(actual.startsWith("org.hl7.fhir.utilities.xhtml.XhtmlNode@"), "toString() should return java the toString default method for objects, which starts with the full class name");
}
@Test
public void testParseScript() throws FHIRFormatError, IOException {
XhtmlNode x = new XhtmlParser().setMustBeWellFormed(true).parse(SOURCE_SCRIPT, "html");
Assertions.assertTrue(x != null);
}
}