Round trip XHTML faithfully wrt empty elements

This commit is contained in:
Grahame Grieve 2022-12-05 13:41:31 +11:00
parent cacf27428e
commit a0c28f3326
5 changed files with 92 additions and 36 deletions

View File

@ -2,12 +2,15 @@ package org.hl7.fhir.r5.test.utils;
import java.io.IOException;
import java.io.InputStream;
import java.util.ArrayList;
import java.util.List;
import org.hl7.fhir.exceptions.FHIRException;
import org.hl7.fhir.r5.context.IWorkerContext.IContextResourceLoader;
import org.hl7.fhir.r5.formats.JsonParser;
import org.hl7.fhir.r5.formats.XmlParser;
import org.hl7.fhir.r5.model.Bundle;
import org.hl7.fhir.r5.model.CodeSystem;
import org.hl7.fhir.r5.model.Resource;
import org.hl7.fhir.utilities.npm.NpmPackage;
@ -43,4 +46,10 @@ public class TestPackageLoader implements IContextResourceLoader {
public IContextResourceLoader getNewLoader(NpmPackage npm) {
return this;
}
@Override
public List<CodeSystem> getCodeSystems() {
return new ArrayList<>();
}
}

View File

@ -90,40 +90,42 @@ public class XhtmlComposer {
private void composeDoc(XhtmlDocument doc) throws IOException {
// headers....
// dst.append("<html>" + (pretty ? "\r\n" : ""));
for (XhtmlNode c : doc.getChildNodes())
for (XhtmlNode c : doc.getChildNodes()) {
writeNode(" ", c, false);
}
// dst.append("</html>" + (pretty ? "\r\n" : ""));
}
private void writeNode(String indent, XhtmlNode node, boolean noPrettyOverride) throws IOException {
if (node.getNodeType() == NodeType.Comment)
if (node.getNodeType() == NodeType.Comment) {
writeComment(indent, node, noPrettyOverride);
else if (node.getNodeType() == NodeType.DocType)
} else if (node.getNodeType() == NodeType.DocType) {
writeDocType(node);
else if (node.getNodeType() == NodeType.Instruction)
} else if (node.getNodeType() == NodeType.Instruction) {
writeInstruction(node);
else if (node.getNodeType() == NodeType.Element)
} else if (node.getNodeType() == NodeType.Element) {
writeElement(indent, node, noPrettyOverride);
else if (node.getNodeType() == NodeType.Document)
} else if (node.getNodeType() == NodeType.Document) {
writeDocument(indent, node);
else if (node.getNodeType() == NodeType.Text)
} else if (node.getNodeType() == NodeType.Text) {
writeText(node);
else if (node.getNodeType() == null)
} else if (node.getNodeType() == null) {
throw new IOException("Null node type");
else
} else {
throw new IOException("Unknown node type: "+node.getNodeType().toString());
}
}
private void writeText(XhtmlNode node) throws IOException {
for (char c : node.getContent().toCharArray())
{
if (c == '&')
if (c == '&') {
dst.append("&amp;");
else if (c == '<')
} else if (c == '<') {
dst.append("&lt;");
else if (c == '>')
} else if (c == '>') {
dst.append("&gt;");
else if (xml) {
} else if (xml) {
if (c == '"')
dst.append("&quot;");
else
@ -189,26 +191,34 @@ public class XhtmlComposer {
indent = "";
// html self closing tags: http://xahlee.info/js/html5_non-closing_tag.html
if (node.getChildNodes().size() == 0 && (xml || Utilities.existsInList(node.getName(), "area", "base", "br", "col", "command", "embed", "hr", "img", "input", "keygen", "link", "menuitem", "meta", "param", "source", "track", "wbr")))
boolean concise = node.getChildNodes().size() == 0;
if (node.hasEmptyExpanded() && node.getEmptyExpanded()) {
concise = false;
}
if (!xml && Utilities.existsInList(node.getName(), "area", "base", "br", "col", "command", "embed", "hr", "img", "input", "keygen", "link", "menuitem", "meta", "param", "source", "track", "wbr")) {
concise = false;
}
if (concise)
dst.append(indent + "<" + node.getName() + attributes(node) + "/>" + (pretty && !noPrettyOverride ? "\r\n" : ""));
else {
boolean act = node.allChildrenAreText();
if (act || !pretty || noPrettyOverride)
dst.append(indent + "<" + node.getName() + attributes(node)+">");
else
dst.append(indent + "<" + node.getName() + attributes(node) + ">\r\n");
if (node.getName() == "head" && node.getElement("meta") == null)
dst.append(indent + " <meta http-equiv=\"Content-Type\" content=\"text/html; charset=UTF-8\"/>" + (pretty && !noPrettyOverride ? "\r\n" : ""));
boolean act = node.allChildrenAreText();
if (act || !pretty || noPrettyOverride)
dst.append(indent + "<" + node.getName() + attributes(node)+">");
else
dst.append(indent + "<" + node.getName() + attributes(node) + ">\r\n");
if (node.getName() == "head" && node.getElement("meta") == null)
dst.append(indent + " <meta http-equiv=\"Content-Type\" content=\"text/html; charset=UTF-8\"/>" + (pretty && !noPrettyOverride ? "\r\n" : ""));
for (XhtmlNode c : node.getChildNodes())
writeNode(indent + " ", c, noPrettyOverride || node.isNoPretty());
if (act)
dst.append("</" + node.getName() + ">" + (pretty && !noPrettyOverride ? "\r\n" : ""));
else if (node.getChildNodes().get(node.getChildNodes().size() - 1).getNodeType() == NodeType.Text)
dst.append((pretty && !noPrettyOverride ? "\r\n"+ indent : "") + "</" + node.getName() + ">" + (pretty && !noPrettyOverride ? "\r\n" : ""));
else
dst.append(indent + "</" + node.getName() + ">" + (pretty && !noPrettyOverride ? "\r\n" : ""));
for (XhtmlNode c : node.getChildNodes())
writeNode(indent + " ", c, noPrettyOverride || node.isNoPretty());
if (act)
dst.append("</" + node.getName() + ">" + (pretty && !noPrettyOverride ? "\r\n" : ""));
else if (node.getChildNodes().get(node.getChildNodes().size() - 1).getNodeType() == NodeType.Text)
dst.append((pretty && !noPrettyOverride ? "\r\n"+ indent : "") + "</" + node.getName() + ">" + (pretty && !noPrettyOverride ? "\r\n" : ""));
else
dst.append(indent + "</" + node.getName() + ">" + (pretty && !noPrettyOverride ? "\r\n" : ""));
}
}

View File

@ -1,5 +1,7 @@
package org.hl7.fhir.utilities.xhtml;
import static org.apache.commons.lang3.StringUtils.isNotBlank;
/*
Copyright (c) 2011+, HL7, Inc.
All rights reserved.
@ -38,21 +40,15 @@ import java.util.HashMap;
import java.util.List;
import java.util.Map;
import org.hl7.fhir.exceptions.DefinitionException;
import org.hl7.fhir.exceptions.FHIRException;
import org.hl7.fhir.exceptions.FHIRFormatError;
import org.hl7.fhir.instance.model.api.IBaseXhtml;
import org.hl7.fhir.utilities.MarkDownProcessor;
import org.hl7.fhir.utilities.Utilities;
import org.hl7.fhir.utilities.MarkDownProcessor.Dialect;
import org.hl7.fhir.utilities.i18n.I18nConstants;
import org.hl7.fhir.utilities.validation.ValidationMessage.IssueType;
import org.hl7.fhir.utilities.Utilities;
import ca.uhn.fhir.model.api.annotation.ChildOrder;
import ca.uhn.fhir.model.primitive.XhtmlDt;
import static org.apache.commons.lang3.StringUtils.isNotBlank;
@ca.uhn.fhir.model.api.annotation.DatatypeDef(name="xhtml")
public class XhtmlNode implements IBaseXhtml {
private static final long serialVersionUID = -4362547161441436492L;
@ -93,6 +89,7 @@ public class XhtmlNode implements IBaseXhtml {
private boolean inPara;
private boolean inLink;
private boolean seperated;
private Boolean emptyExpanded;
public XhtmlNode() {
super();
@ -416,6 +413,19 @@ public class XhtmlNode implements IBaseXhtml {
}
public Boolean getEmptyExpanded() {
return emptyExpanded;
}
public boolean hasEmptyExpanded() {
return emptyExpanded != null;
}
public void setEmptyExpanded(Boolean emptyExpanded) {
this.emptyExpanded = emptyExpanded;
}
@Override
public String getValueAsString() {
if (isEmpty()) {

View File

@ -506,10 +506,12 @@ public class XhtmlParser {
if (peekChar() != '>')
throw new FHIRFormatError("unexpected non-end of element "+n+" "+descLoc());
readChar();
root.setEmptyExpanded(false);
} else {
unwindPoint = null;
List<XhtmlNode> p = new ArrayList<>();
parseElementInner(root, p, nsm, true);
root.setEmptyExpanded(true);
}
return result;
}
@ -671,7 +673,9 @@ public class XhtmlParser {
if (peekChar() != '>')
throw new FHIRFormatError("unexpected non-end of element "+name+" "+descLoc());
readChar();
node.setEmptyExpanded(false);
} else {
node.setEmptyExpanded(true);
parseElementInner(node, newParents, namespaceMap, "script".equals(name.getName()));
}
}

View File

@ -6,6 +6,8 @@ import java.io.ObjectOutputStream;
import org.hl7.fhir.exceptions.FHIRException;
import org.hl7.fhir.exceptions.FHIRFormatError;
import org.hl7.fhir.utilities.TextFile;
import org.hl7.fhir.utilities.xhtml.XhtmlComposer;
import org.hl7.fhir.utilities.xhtml.XhtmlNode;
import org.hl7.fhir.utilities.xhtml.XhtmlParser;
import org.junit.jupiter.api.Assertions;
@ -137,4 +139,25 @@ public class XhtmlNodeTest {
Assertions.assertEquals("http://www.w3.org/1999/xlink", x.getChildNodes().get(0).getChildNodes().get(1).getAttributes().get("xmlns:xlink"));
}
@Test
public void testParseSvgElements() throws FHIRFormatError, IOException {
String src = BaseTestingUtilities.loadTestResource("xhtml", "xhtml-empty-elements.xml");
XhtmlNode x = new XhtmlParser().parse(src, "xml");
String xml = new XhtmlComposer(false, false).compose(x);
Assertions.assertEquals(src.trim(), xml.trim());
}
@Test
public void testParseSvgF() throws FHIRFormatError, IOException {
String src = TextFile.fileToString("/Users/grahamegrieve/work/r5/source/fhir-exchanges.svg.html");
XhtmlNode x = new XhtmlParser().parse(src, "svg");
String xml = new XhtmlComposer(false, true).compose(x);
TextFile.stringToFile(xml, "/Users/grahamegrieve/work/r5/source/fhir-exchanges.svg.html");
}
}