Do not delete previously undefined xmlns attributes when processing Xhtml nodes (#985)

* Add tests

* Add back unseen namespace attributes

* Gentle refactor, document setOriginalNamespacePrefix

* Update validator_test_case_version

Co-authored-by: dotasek <david.otasek@smilecdr.com>
This commit is contained in:
dotasek 2022-11-30 16:01:28 -05:00 committed by GitHub
parent 19e957ef53
commit d53aabe2e5
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
4 changed files with 156 additions and 69 deletions

View File

@ -60,45 +60,84 @@ public class XhtmlParser {
public static final String XHTML_NS = "http://www.w3.org/1999/xhtml"; public static final String XHTML_NS = "http://www.w3.org/1999/xhtml";
private static final char END_OF_CHARS = (char) -1; private static final char END_OF_CHARS = (char) -1;
public class NSMap { public class NamespaceNormalizationMap {
private Map<String, String> nslist = new HashMap<String, String>();
public NSMap(NSMap nsm) { private String defaultNamespace;
if (nsm != null)
nslist.putAll(nsm.nslist);
}
public void def(String ns) { private String originalNamespacePrefix;
nslist.put("", ns); private Map<String, String> map = new HashMap<String, String>();
}
public void ns(String abbrev, String ns) { public NamespaceNormalizationMap(NamespaceNormalizationMap namespaceMap) {
nslist.put(abbrev, ns); if (namespaceMap != null) {
} map.putAll(namespaceMap.map);
this.defaultNamespace = namespaceMap.defaultNamespace;
public String def() { this.originalNamespacePrefix = namespaceMap.originalNamespacePrefix;
return nslist.get("");
}
public boolean hasDef() {
return nslist.containsKey("");
}
public String get(String abbrev) {
return nslist.containsKey(abbrev) ? nslist.get(abbrev) : "http://error/undefined-namespace";
} }
} }
public class QName { public void setDefaultNamespace(String defaultNamespace) {
private String ns; this.defaultNamespace = defaultNamespace;
}
/**
* Keeps track of the original namespace this element had before it was normalized
*
* This way, child elements using that prefix will be able recognize that they
* should use the default namespace.
*
* <namespaceA:parentElement xmlns:namespaceA="http://www.somewhere.org/namespaceA">
* <namespaceA: childElement/>
* </namespaceA:parentElement>
*
* parentElement's namespaceA would become the default namespace.
*
* When normalizing childElement originalNamespacePrefix would be namespaceA,
* so we would know that childElement should use the default namespace.
*
* <parentElement xmlns="http://www.somewhere.org/namespaceA">
* <childElement/>
* </parentElement>
*
* @param originalNamespacePrefix
*/
public void setOriginalNamespacePrefix(String originalNamespacePrefix) {
this.originalNamespacePrefix = originalNamespacePrefix;
}
public void putNamespacePrefix(String prefix, String namespace) {
map.put(prefix, namespace);
}
public String getDefaultNamespace() {
return defaultNamespace;
}
public boolean hasDefaultNamespace() {
return defaultNamespace != null;
}
public String getNamespaceForPrefix(String prefix) {
if (originalNamespacePrefix != null && originalNamespacePrefix.equals(prefix)) {
return defaultNamespace;
}
return map.containsKey(prefix) ? map.get(prefix) : "http://error/undefined-namespace";
}
public Set<Map.Entry<String, String>> getPrefixNamespaceEntrySet() {
return map.entrySet();
}
}
public class ElementName {
private String namespace;
private String name; private String name;
public QName(String src) { public ElementName(String src) {
if (src.contains(":")) { if (src.contains(":")) {
ns = src.substring(0, src.indexOf(":")); namespace = src.substring(0, src.indexOf(":"));
name = src.substring(src.indexOf(":")+1); name = src.substring(src.indexOf(":")+1);
} else { } else {
ns = null; namespace = null;
name = src; name = src;
} }
} }
@ -107,17 +146,17 @@ public class XhtmlParser {
return name; return name;
} }
public boolean hasNs() { public boolean hasNamespace() {
return ns != null; return namespace != null;
} }
public String getNs() { public String getNamespace() {
return ns; return namespace;
} }
@Override @Override
public String toString() { public String toString() {
return ns+"::"+name; return namespace +"::"+name;
} }
} }
@ -455,14 +494,14 @@ public class XhtmlParser {
throw new FHIRFormatError("Unable to Parse HTML - does not start with tag. Found "+peekChar()+descLoc()); throw new FHIRFormatError("Unable to Parse HTML - does not start with tag. Found "+peekChar()+descLoc());
readChar(); readChar();
markLocation(); markLocation();
QName n = new QName(readName().toLowerCase()); ElementName n = new ElementName(readName().toLowerCase());
if ((entryName != null) && !n.getName().equals(entryName)) if ((entryName != null) && !n.getName().equals(entryName))
throw new FHIRFormatError("Unable to Parse HTML - starts with '"+n+"' not '"+entryName+"'"+descLoc()); throw new FHIRFormatError("Unable to Parse HTML - starts with '"+n+"' not '"+entryName+"'"+descLoc());
XhtmlNode root = result.addTag(n.getName()); XhtmlNode root = result.addTag(n.getName());
root.setLocation(markLocation()); root.setLocation(markLocation());
parseAttributes(root); parseAttributes(root);
markLocation(); markLocation();
NSMap nsm = checkNamespaces(n, root, null, true); NamespaceNormalizationMap nsm = normalizeNamespaces(n, root, null, true);
if (readChar() == '/') { if (readChar() == '/') {
if (peekChar() != '>') if (peekChar() != '>')
throw new FHIRFormatError("unexpected non-end of element "+n+" "+descLoc()); throw new FHIRFormatError("unexpected non-end of element "+n+" "+descLoc());
@ -481,34 +520,59 @@ public class XhtmlParser {
return res; return res;
} }
private NSMap checkNamespaces(QName n, XhtmlNode node, NSMap nsm, boolean root) { private NamespaceNormalizationMap normalizeNamespaces(ElementName elementName, XhtmlNode node, NamespaceNormalizationMap parentNamespaceMap, boolean nodeIsRoot) {
// what we do here is strip out any stated namespace attributes, putting them in the namesapce map // what we do here is strip out any stated namespace attributes, putting them in the namespace map
// then we figure out what the namespace of this element is, and state it explicitly if it's not the default // then we figure out what the namespace of this element is, and state it explicitly if it's not the default
NSMap result = new NSMap(nsm); NamespaceNormalizationMap nodeNamespaceMap = new NamespaceNormalizationMap(parentNamespaceMap);
List<String> nsattrs = new ArrayList<String>(); List<String> namespaceAttributes = new ArrayList<String>();
for (String an : node.getAttributes().keySet()) { for (String an : node.getAttributes().keySet()) {
if (an.equals("xmlns")) { if (an.equals("xmlns")) {
result.def(node.getAttribute(an)); nodeNamespaceMap.setDefaultNamespace(node.getAttribute(an));
nsattrs.add(an); namespaceAttributes.add(an);
} }
if (an.startsWith("xmlns:")) { if (an.startsWith("xmlns:")) {
result.ns(an.substring(6), node.getAttribute(an)); nodeNamespaceMap.putNamespacePrefix(an.substring(6), node.getAttribute(an));
nsattrs.add(an); namespaceAttributes.add(an);
} }
} }
for (String s : nsattrs)
for (String s : namespaceAttributes)
node.getAttributes().remove(s); node.getAttributes().remove(s);
if (n.hasNs()) { if (elementName.hasNamespace()) {
String nns = result.get(n.getNs()); String elementNamespace = nodeNamespaceMap.getNamespaceForPrefix(elementName.getNamespace());
if (!nns.equals(result.def())) { if (!elementNamespace.equals(nodeNamespaceMap.getDefaultNamespace())) {
node.getAttributes().put("xmlns", nns); node.getAttributes().put("xmlns", elementNamespace);
result.def(nns); nodeNamespaceMap.setDefaultNamespace(elementNamespace);
nodeNamespaceMap.setOriginalNamespacePrefix(elementName.getNamespace());
nodeNamespaceMap.map.remove(elementName.getNamespace());
} }
} else if (root && result.hasDef()) {
node.getAttributes().put("xmlns", result.def());
} }
return result; // Add namespaces back if not defined in parentNamespaceMap (we haven't seen it before, so we need to define it here)
if (shouldAddXmlnsNamespaceAttribute(parentNamespaceMap, nodeIsRoot, nodeNamespaceMap)) {
node.getAttributes().put("xmlns", nodeNamespaceMap.getDefaultNamespace());
}
for (Map.Entry<String, String> entry : nodeNamespaceMap.getPrefixNamespaceEntrySet() ) {
if (shouldAddXmlnsNamespacePrefixAttribute(parentNamespaceMap, nodeIsRoot, entry.getKey())) {
node.getAttributes().put("xmlns:" + entry.getKey(), entry.getValue());
}
}
return nodeNamespaceMap;
}
private static boolean shouldAddXmlnsNamespacePrefixAttribute(NamespaceNormalizationMap parentNamespaceMap, boolean nodeIsRoot, String attributeKey) {
if (nodeIsRoot) {
return true;
}
return (!parentNamespaceMap.map.containsKey(attributeKey));
}
private static boolean shouldAddXmlnsNamespaceAttribute(NamespaceNormalizationMap parentNamespaceMap, boolean nodeIsRoot, NamespaceNormalizationMap nodeNamespaceMap) {
if (nodeIsRoot) {
return nodeNamespaceMap.hasDefaultNamespace();
}
return nodeNamespaceMap.hasDefaultNamespace() && (parentNamespaceMap == null || !nodeNamespaceMap.getDefaultNamespace().equals(parentNamespaceMap.getDefaultNamespace()));
} }
private void addTextNode(XhtmlNode node, StringBuilder s) private void addTextNode(XhtmlNode node, StringBuilder s)
@ -522,7 +586,7 @@ public class XhtmlParser {
s.setLength(0); s.setLength(0);
} }
} }
private void parseElementInner(XhtmlNode node, List<XhtmlNode> parents, NSMap nsm, boolean escaping) throws FHIRFormatError, IOException private void parseElementInner(XhtmlNode node, List<XhtmlNode> parents, NamespaceNormalizationMap nsm, boolean escaping) throws FHIRFormatError, IOException
{ {
StringBuilder s = new StringBuilder(); StringBuilder s = new StringBuilder();
while (peekChar() != END_OF_CHARS && !parents.contains(unwindPoint) && !(node == unwindPoint)) while (peekChar() != END_OF_CHARS && !parents.contains(unwindPoint) && !(node == unwindPoint))
@ -541,7 +605,7 @@ public class XhtmlParser {
node.addComment(readToTagEnd()).setLocation(markLocation()); node.addComment(readToTagEnd()).setLocation(markLocation());
else if (peekChar() == '/') { else if (peekChar() == '/') {
readChar(); readChar();
QName n = new QName(readToTagEnd()); ElementName n = new ElementName(readToTagEnd());
if (node.getName().equals(n.getName())) if (node.getName().equals(n.getName()))
return; return;
else else
@ -591,10 +655,10 @@ public class XhtmlParser {
addTextNode(node, s); addTextNode(node, s);
} }
private void parseElement(XhtmlNode parent, List<XhtmlNode> parents, NSMap nsm) throws IOException, FHIRFormatError private void parseElement(XhtmlNode parent, List<XhtmlNode> parents, NamespaceNormalizationMap namespaceMap) throws IOException, FHIRFormatError
{ {
markLocation(); markLocation();
QName name = new QName(readName()); ElementName name = new ElementName(readName());
XhtmlNode node = parent.addTag(name.getName()); XhtmlNode node = parent.addTag(name.getName());
node.setLocation(markLocation()); node.setLocation(markLocation());
List<XhtmlNode> newParents = new ArrayList<XhtmlNode>(); List<XhtmlNode> newParents = new ArrayList<XhtmlNode>();
@ -602,13 +666,13 @@ public class XhtmlParser {
newParents.add(parent); newParents.add(parent);
parseAttributes(node); parseAttributes(node);
markLocation(); markLocation();
nsm = checkNamespaces(name, node, nsm, false); namespaceMap = normalizeNamespaces(name, node, namespaceMap, false);
if (readChar() == '/') { if (readChar() == '/') {
if (peekChar() != '>') if (peekChar() != '>')
throw new FHIRFormatError("unexpected non-end of element "+name+" "+descLoc()); throw new FHIRFormatError("unexpected non-end of element "+name+" "+descLoc());
readChar(); readChar();
} else { } else {
parseElementInner(node, newParents, nsm, "script".equals(name.getName())); parseElementInner(node, newParents, namespaceMap, "script".equals(name.getName()));
} }
} }

View File

@ -110,8 +110,31 @@ public class XhtmlNodeTest {
@Test @Test
public void testParseEntities() throws FHIRFormatError, IOException { public void testParseEntities() throws FHIRFormatError, IOException {
XhtmlNode x = new XhtmlParser().parse(BaseTestingUtilities.loadTestResource("xhtml", "entities.html"), "div"); XhtmlNode x = new XhtmlParser().parse(BaseTestingUtilities.loadTestResource("xhtml", "entities.html"), "div");
} }
@Test
public void testParseSvg() throws FHIRFormatError, IOException {
XhtmlNode x = new XhtmlParser().parse(BaseTestingUtilities.loadTestResource("xhtml", "svg.html"), "svg");
Assertions.assertEquals("http://www.w3.org/2000/svg", x.getChildNodes().get(1).getAttributes().get("xmlns"));
Assertions.assertEquals("http://www.w3.org/1999/xlink", x.getChildNodes().get(1).getAttributes().get("xmlns:xlink"));
}
@Test
public void testParseSvgNotRoot() throws FHIRFormatError, IOException {
XhtmlNode x = new XhtmlParser().parse(BaseTestingUtilities.loadTestResource("xhtml", "non-root-svg.html"), "div");
Assertions.assertEquals("http://www.w3.org/2000/svg", x.getChildNodes().get(0).getChildNodes().get(1).getAttributes().get("xmlns"));
Assertions.assertEquals("http://www.w3.org/1999/xlink", x.getChildNodes().get(0).getChildNodes().get(1).getAttributes().get("xmlns:xlink"));
}
@Test
public void testParseNamespacedSvgNotRoot() throws FHIRFormatError, IOException {
XhtmlNode x = new XhtmlParser().parse(BaseTestingUtilities.loadTestResource("xhtml", "namespaced-non-root-svg.html"), "div");
Assertions.assertEquals("http://www.w3.org/2000/svg", x.getChildNodes().get(0).getChildNodes().get(1).getAttributes().get("xmlns"));
Assertions.assertEquals("http://www.w3.org/1999/xlink", x.getChildNodes().get(0).getChildNodes().get(1).getAttributes().get("xmlns:xlink"));
}
} }

View File

@ -19,7 +19,7 @@
<properties> <properties>
<hapi_fhir_version>5.4.0</hapi_fhir_version> <hapi_fhir_version>5.4.0</hapi_fhir_version>
<validator_test_case_version>1.1.127</validator_test_case_version> <validator_test_case_version>1.1.128-SNAPSHOT</validator_test_case_version>
<junit_jupiter_version>5.7.1</junit_jupiter_version> <junit_jupiter_version>5.7.1</junit_jupiter_version>
<junit_platform_launcher_version>1.8.2</junit_platform_launcher_version> <junit_platform_launcher_version>1.8.2</junit_platform_launcher_version>
<maven_surefire_version>3.0.0-M5</maven_surefire_version> <maven_surefire_version>3.0.0-M5</maven_surefire_version>