diff --git a/org.hl7.fhir.validation/src/main/java/org/hl7/fhir/validation/instance/InstanceValidator.java b/org.hl7.fhir.validation/src/main/java/org/hl7/fhir/validation/instance/InstanceValidator.java index 48ce27dfe..45397a8c7 100644 --- a/org.hl7.fhir.validation/src/main/java/org/hl7/fhir/validation/instance/InstanceValidator.java +++ b/org.hl7.fhir.validation/src/main/java/org/hl7/fhir/validation/instance/InstanceValidator.java @@ -33,14 +33,12 @@ package org.hl7.fhir.validation.instance; import static org.apache.commons.lang3.StringUtils.isBlank; import static org.apache.commons.lang3.StringUtils.isNotBlank; -import java.io.ByteArrayInputStream; import java.io.File; import java.io.IOException; import java.io.InputStream; import java.math.BigDecimal; import java.net.URI; import java.net.URISyntaxException; -import java.nio.charset.StandardCharsets; import java.util.ArrayList; import java.util.Arrays; import java.util.Calendar; @@ -57,7 +55,6 @@ import java.util.UUID; import javax.annotation.Nonnull; -import org.apache.commons.codec.binary.Base64InputStream; import org.apache.commons.lang3.NotImplementedException; import org.apache.commons.lang3.StringUtils; import org.fhir.ucum.Decimal; @@ -229,18 +226,7 @@ import org.hl7.fhir.validation.instance.type.StructureMapValidator; import org.hl7.fhir.validation.instance.type.StructureMapValidator.VariableDefn; import org.hl7.fhir.validation.instance.type.StructureMapValidator.VariableSet; import org.hl7.fhir.validation.instance.type.ValueSetValidator; -import org.hl7.fhir.validation.instance.utils.CanonicalResourceLookupResult; -import org.hl7.fhir.validation.instance.utils.CanonicalTypeSorter; -import org.hl7.fhir.validation.instance.utils.ChildIterator; -import org.hl7.fhir.validation.instance.utils.ElementInfo; -import org.hl7.fhir.validation.instance.utils.EnableWhenEvaluator; -import org.hl7.fhir.validation.instance.utils.FHIRPathExpressionFixer; -import org.hl7.fhir.validation.instance.utils.IndexedElement; -import org.hl7.fhir.validation.instance.utils.NodeStack; -import org.hl7.fhir.validation.instance.utils.ResolvedReference; -import org.hl7.fhir.validation.instance.utils.ResourceValidationTracker; -import org.hl7.fhir.validation.instance.utils.StructureDefinitionSorterByUrl; -import org.hl7.fhir.validation.instance.utils.ValidationContext; +import org.hl7.fhir.validation.instance.utils.*; import org.w3c.dom.Document; /** @@ -2848,12 +2834,12 @@ public class InstanceValidator extends BaseValidator implements IResourceValidat if (type.equals("base64Binary")) { String encoded = e.primitiveValue(); if (isNotBlank(encoded)) { - boolean bok = isValidBase64(encoded); + boolean bok = Base64Util.isValidBase64(encoded); if (!bok) { String value = encoded.length() < 100 ? encoded : "(snip)"; ok = rule(errors, NO_RULE_DATE, IssueType.INVALID, e.line(), e.col(), path, false, I18nConstants.TYPE_SPECIFIC_CHECKS_DT_BASE64_VALID, value) && ok; } else { - boolean wsok = !base64HasWhitespace(encoded); + boolean wsok = !Base64Util.base64HasWhitespace(encoded); if (VersionUtilities.isR5VerOrLater(this.context.getVersion())) { ok = rule(errors, NO_RULE_DATE, IssueType.INVALID, e.line(), e.col(), path, wsok, I18nConstants.TYPE_SPECIFIC_CHECKS_DT_BASE64_NO_WS_ERROR) && ok; } else { @@ -2861,7 +2847,7 @@ public class InstanceValidator extends BaseValidator implements IResourceValidat } } if (bok && context.hasExtension(ToolingExtensions.EXT_MAX_SIZE)) { - int size = countBase64DecodedBytes(encoded); + int size = Base64Util.countBase64DecodedBytes(encoded); long def = Long.parseLong(ToolingExtensions.readStringExtension(context, ToolingExtensions.EXT_MAX_SIZE)); ok = rule(errors, NO_RULE_DATE, IssueType.STRUCTURE, e.line(), e.col(), path, size <= def, I18nConstants.TYPE_SPECIFIC_CHECKS_DT_BASE64_TOO_LONG, size, def) && ok; } @@ -3280,70 +3266,6 @@ public class InstanceValidator extends BaseValidator implements IResourceValidat return false; } - /** - * Technically this is not bulletproof as some invalid base64 won't be caught, - * but I think it's good enough. The original code used Java8 Base64 decoder - * but I've replaced it with a regex for 2 reasons: - * 1. This code will run on any version of Java - * 2. This code doesn't actually decode, which is much easier on memory use for big payloads - */ - private boolean isValidBase64(String theEncoded) { - if (theEncoded == null) { - return false; - } - int charCount = 0; - boolean ok = true; - for (int i = 0; i < theEncoded.length(); i++) { - char nextChar = theEncoded.charAt(i); - if (Utilities.isWhitespace(nextChar)) { - continue; - } - if (Character.isLetterOrDigit(nextChar)) { - charCount++; - } - if (nextChar == '/' || nextChar == '=' || nextChar == '+') { - charCount++; - } - } - - if (charCount > 0 && charCount % 4 != 0) { - ok = false; - } - return ok; - } - - private boolean base64HasWhitespace(String theEncoded) { - if (theEncoded == null) { - return false; - } - for (int i = 0; i < theEncoded.length(); i++) { - char nextChar = theEncoded.charAt(i); - if (Utilities.isWhitespace(nextChar)) { - return true; - } - } - return false; - - } - - - private int countBase64DecodedBytes(String theEncoded) { - Base64InputStream inputStream = new Base64InputStream(new ByteArrayInputStream(theEncoded.getBytes(StandardCharsets.UTF_8))); - try { - try { - for (int counter = 0; ; counter++) { - if (inputStream.read() == -1) { - return counter; - } - } - } finally { - inputStream.close(); - } - } catch (IOException e) { - throw new IllegalStateException(e); // should not happen - } - } - private boolean isDefinitionURL(String url) { return Utilities.existsInList(url, @@ -3498,10 +3420,10 @@ public class InstanceValidator extends BaseValidator implements IResourceValidat for (XhtmlNode node : list) { if (node.getNodeType() == NodeType.Element) { if ("a".equals(node.getName())) { - String msg = checkValidUrl(node.getAttribute("href")); + String msg = UrlUtil.checkValidUrl(node.getAttribute("href"), context); ok = rule(errors, NO_RULE_DATE, IssueType.INVALID, e.line(), e.col(), path, msg == null, I18nConstants.XHTML_URL_INVALID, node.getAttribute("href"), msg) && ok; } else if ("img".equals(node.getName())) { - String msg = checkValidUrl(node.getAttribute("src")); + String msg = UrlUtil.checkValidUrl(node.getAttribute("src"), context); ok = rule(errors, NO_RULE_DATE, IssueType.INVALID, e.line(), e.col(), path, msg == null, I18nConstants.XHTML_URL_INVALID, node.getAttribute("src"), msg) && ok; } ok = checkUrls(errors, e, path, node.getChildNodes()) && ok; @@ -3510,56 +3432,6 @@ public class InstanceValidator extends BaseValidator implements IResourceValidat return ok; } - private String checkValidUrl(String value) { - if (value == null) { - return null; - } - if (Utilities.noString(value)) { - return context.formatMessage(I18nConstants.XHTML_URL_EMPTY); - } - - if (value.startsWith("data:")) { - String[] p = value.substring(5).split("\\,"); - if (p.length < 2) { - return context.formatMessage(I18nConstants.XHTML_URL_DATA_NO_DATA, value); - } else if (p.length > 2) { - return context.formatMessage(I18nConstants.XHTML_URL_DATA_DATA_INVALID_COMMA, value); - } else if (!p[0].endsWith(";base64") || !isValidBase64(p[1])) { - return context.formatMessage(I18nConstants.XHTML_URL_DATA_DATA_INVALID, value); - } else { - if (p[0].startsWith(" ")) { - p[0] = Utilities.trimWS(p[0]); - } - String mMsg = checkValidMimeType(p[0].substring(0, p[0].lastIndexOf(";"))); - if (mMsg != null) { - return context.formatMessage(I18nConstants.XHTML_URL_DATA_MIMETYPE, value, mMsg); - } - } - return null; - } else { - Set invalidChars = new HashSet<>(); - int c = 0; - for (char ch : value.toCharArray()) { - if (!(Character.isDigit(ch) || Character.isAlphabetic(ch) || Utilities.existsInList(ch, ';', '?', ':', '@', '&', '=', '+', '$', '.', ',', '/', '%', '-', '_', '~', '#', '[', ']', '!', '\'', '(', ')', '*', '|' ))) { - c++; - invalidChars.add(ch); - } - } - if (invalidChars.isEmpty()) { - return null; - } else { - return context.formatMessagePlural(c, I18nConstants.XHTML_URL_INVALID_CHARS, invalidChars.toString()); - } - } - } - - private String checkValidMimeType(String mt) { - if (!mt.matches("^(\\w+|\\*)\\/(\\w+|\\*)((;\\s*(\\w+)=\\s*(\\S+))?)$")) { - return "Mime type invalid"; - } - return null; - } - private boolean checkInnerNS(List errors, Element e, String path, List list) { boolean ok = true; for (XhtmlNode node : list) { @@ -3826,9 +3698,9 @@ public class InstanceValidator extends BaseValidator implements IResourceValidat String b64 = element.getChildValue("data"); // Note: If the value isn't valid, we're not adding an error here, as the test to the // child Base64Binary will catch it and we don't want to log it twice - boolean bok = isValidBase64(b64); + boolean bok = Base64Util.isValidBase64(b64); if (bok && element.hasChild("size", false)) { - size = countBase64DecodedBytes(b64); + size = Base64Util.countBase64DecodedBytes(b64); String sz = element.getChildValue("size"); ok = rule(errors, NO_RULE_DATE, IssueType.STRUCTURE, element.line(), element.col(), path, Long.toString(size).equals(sz), I18nConstants.TYPE_SPECIFIC_CHECKS_DT_ATT_SIZE_CORRECT, sz, size) && ok; } diff --git a/org.hl7.fhir.validation/src/main/java/org/hl7/fhir/validation/instance/utils/Base64Util.java b/org.hl7.fhir.validation/src/main/java/org/hl7/fhir/validation/instance/utils/Base64Util.java new file mode 100644 index 000000000..16cbab452 --- /dev/null +++ b/org.hl7.fhir.validation/src/main/java/org/hl7/fhir/validation/instance/utils/Base64Util.java @@ -0,0 +1,73 @@ +package org.hl7.fhir.validation.instance.utils; + +import java.io.ByteArrayInputStream; +import java.io.IOException; +import java.nio.charset.StandardCharsets; +import org.apache.commons.codec.binary.Base64InputStream; + +import org.hl7.fhir.utilities.Utilities; + +public class Base64Util { + /** + * Technically this is not bulletproof as some invalid base64 won't be caught, + * but I think it's good enough. The original code used Java8 Base64 decoder + * but I've replaced it with a regex for 2 reasons: + * 1. This code will run on any version of Java + * 2. This code doesn't actually decode, which is much easier on memory use for big payloads + */ + public static boolean isValidBase64(String theEncoded) { + if (theEncoded == null) { + return false; + } + int charCount = 0; + boolean ok = true; + for (int i = 0; i < theEncoded.length(); i++) { + char nextChar = theEncoded.charAt(i); + if (Utilities.isWhitespace(nextChar)) { + continue; + } + if (Character.isLetterOrDigit(nextChar)) { + charCount++; + } + if (nextChar == '/' || nextChar == '=' || nextChar == '+') { + charCount++; + } + } + + if (charCount > 0 && charCount % 4 != 0) { + ok = false; + } + return ok; + } + + public static boolean base64HasWhitespace(String theEncoded) { + if (theEncoded == null) { + return false; + } + for (int i = 0; i < theEncoded.length(); i++) { + char nextChar = theEncoded.charAt(i); + if (Utilities.isWhitespace(nextChar)) { + return true; + } + } + return false; + } + + public static int countBase64DecodedBytes(String theEncoded) { + Base64InputStream inputStream = new Base64InputStream(new ByteArrayInputStream(theEncoded.getBytes(StandardCharsets.UTF_8))); + try { + try { + for (int counter = 0; ; counter++) { + if (inputStream.read() == -1) { + return counter; + } + } + } finally { + inputStream.close(); + } + } catch (IOException e) { + throw new IllegalStateException(e); // should not happen + } + } + +} diff --git a/org.hl7.fhir.validation/src/main/java/org/hl7/fhir/validation/instance/utils/MimeTypeUtil.java b/org.hl7.fhir.validation/src/main/java/org/hl7/fhir/validation/instance/utils/MimeTypeUtil.java new file mode 100644 index 000000000..4f3bfd83e --- /dev/null +++ b/org.hl7.fhir.validation/src/main/java/org/hl7/fhir/validation/instance/utils/MimeTypeUtil.java @@ -0,0 +1,10 @@ +package org.hl7.fhir.validation.instance.utils; + +public class MimeTypeUtil { + public static String checkValidMimeType(String mt) { + if (!mt.matches("^(\\w+|\\*)/([\\w-+]+|\\*)((;\\s*(\\w+)=\\s*(\\S+))?)$")) { + return "Mime type invalid"; + } + return null; + } +} diff --git a/org.hl7.fhir.validation/src/main/java/org/hl7/fhir/validation/instance/utils/UrlUtil.java b/org.hl7.fhir.validation/src/main/java/org/hl7/fhir/validation/instance/utils/UrlUtil.java new file mode 100644 index 000000000..2568c4640 --- /dev/null +++ b/org.hl7.fhir.validation/src/main/java/org/hl7/fhir/validation/instance/utils/UrlUtil.java @@ -0,0 +1,57 @@ +package org.hl7.fhir.validation.instance.utils; + +import java.util.HashSet; +import java.util.Set; + +import org.hl7.fhir.r5.context.IWorkerContext; +import org.hl7.fhir.utilities.i18n.I18nConstants; +import org.hl7.fhir.utilities.Utilities; +import org.hl7.fhir.validation.instance.utils.Base64Util; + +public class UrlUtil { + public static String checkValidUrl(String value, IWorkerContext context) { + if (value == null) { + return null; + } + if (Utilities.noString(value)) { + return context.formatMessage(I18nConstants.XHTML_URL_EMPTY); + } + + if (value.startsWith("data:")) { + String[] p = value.substring(5).split("\\,"); + if (p.length < 2) { + return context.formatMessage(I18nConstants.XHTML_URL_DATA_NO_DATA, value); + } else if (p.length > 2) { + return context.formatMessage(I18nConstants.XHTML_URL_DATA_DATA_INVALID_COMMA, value); + } else if (p[0].endsWith(";base64") && !Base64Util.isValidBase64(p[1])) { + return context.formatMessage(I18nConstants.XHTML_URL_DATA_DATA_INVALID, value); + } else { + if (p[0].startsWith(" ")) { + p[0] = Utilities.trimWS(p[0]); + } + String mimetype = (p[0].endsWith(";base64")) ? p[0].substring(0, p[0].lastIndexOf(";")) : p[0]; + if (!mimetype.trim().isEmpty()) { + String mMsg = MimeTypeUtil.checkValidMimeType(mimetype); + if (mMsg != null) { + return context.formatMessage(I18nConstants.XHTML_URL_DATA_MIMETYPE, value, mMsg); + } + } + } + return null; + } else { + Set invalidChars = new HashSet<>(); + int c = 0; + for (char ch : value.toCharArray()) { + if (!(Character.isDigit(ch) || Character.isAlphabetic(ch) || Utilities.existsInList(ch, ';', '?', ':', '@', '&', '=', '+', '$', '.', ',', '/', '%', '-', '_', '~', '#', '[', ']', '!', '\'', '(', ')', '*', '|' ))) { + c++; + invalidChars.add(ch); + } + } + if (invalidChars.isEmpty()) { + return null; + } else { + return context.formatMessagePlural(c, I18nConstants.XHTML_URL_INVALID_CHARS, invalidChars.toString()); + } + } + } +} diff --git a/org.hl7.fhir.validation/src/test/java/org/hl7/fhir/validation/instance/utils/Base64UtilTest.java b/org.hl7.fhir.validation/src/test/java/org/hl7/fhir/validation/instance/utils/Base64UtilTest.java new file mode 100644 index 000000000..557f4d82b --- /dev/null +++ b/org.hl7.fhir.validation/src/test/java/org/hl7/fhir/validation/instance/utils/Base64UtilTest.java @@ -0,0 +1,58 @@ +package org.hl7.fhir.validation.instance.utils; + +import org.junit.jupiter.params.ParameterizedTest; +import org.junit.jupiter.params.provider.ValueSource; +import org.junit.jupiter.params.provider.CsvSource; + +import static org.junit.jupiter.api.Assertions.assertTrue; +import static org.junit.jupiter.api.Assertions.assertFalse; +import static org.junit.jupiter.api.Assertions.assertEquals; + +public class Base64UtilTest { + + @ParameterizedTest + @ValueSource(strings = { + "AAAA", + "AAAABBBB", + "AA AA BB BB ", + "ABCDEFGHIJKLMNOPQRSTUVWXYZ abcdefghijklmnopqrstuvwxyz 0123456789 /+====" + }) + public void testValidBase64(String base64) { + assertTrue(Base64Util.isValidBase64(base64)); + } + + @ParameterizedTest + @ValueSource(strings = { + "A==", // Improperly padded + "A(B=", // Invalid character in the default encoding + }) + public void testInvalidBase64(String base64) { + assertFalse(Base64Util.isValidBase64(base64)); + } + + @ParameterizedTest + @ValueSource(strings = { + "AA AA", + "AA AA", + "AAAA ", + "AA\nAA", + }) + public void testBase64WithWhitespace(String base64) { + assertTrue(Base64Util.base64HasWhitespace(base64)); + } + + public void testBase64WithoutWhitespace(String base64) { + assertFalse(Base64Util.base64HasWhitespace("AAAA")); + } + + @ParameterizedTest + @CsvSource({ + "AAAA, 3", + "AAAABBBB, 6", + "AAA=, 2", + }) + public void testValidBase64(String base64, int length) { + assertEquals(Base64Util.countBase64DecodedBytes(base64), length); + } + +} diff --git a/org.hl7.fhir.validation/src/test/java/org/hl7/fhir/validation/instance/utils/MimeTypeUtilTest.java b/org.hl7.fhir.validation/src/test/java/org/hl7/fhir/validation/instance/utils/MimeTypeUtilTest.java new file mode 100644 index 000000000..8e1be71c9 --- /dev/null +++ b/org.hl7.fhir.validation/src/test/java/org/hl7/fhir/validation/instance/utils/MimeTypeUtilTest.java @@ -0,0 +1,40 @@ +package org.hl7.fhir.validation.instance.utils; + +import org.junit.jupiter.params.ParameterizedTest; +import org.junit.jupiter.params.provider.ValueSource; + +import static org.junit.jupiter.api.Assertions.assertNotNull; +import static org.junit.jupiter.api.Assertions.assertNull; + +public class MimeTypeUtilTest { + + @ParameterizedTest + @ValueSource(strings = { + "application/fhir", + "text/plain", + "text/plain;charset=UTF-8", + "application/octet-stream", + "application/xhtml+xml", + }) + public void testValidMimeTypes(String mimeType) { + testMimeType(mimeType, true); + } + + @ParameterizedTest + @ValueSource(strings = { + "application/fhir;anything", // semicolon and everything after shouldn't work if it's not a parameter=value pair + "application/fhir;parameter=", // parameter without value shouldn't work. + }) + public void testInvalidMimeTypes(String mimeType) { + testMimeType(mimeType, false); + } + + private static void testMimeType(String mimeType, boolean valid) { + String result = MimeTypeUtil.checkValidMimeType(mimeType); + if (valid) { + assertNull(result); + } else { + assertNotNull(result); + } + } +} diff --git a/org.hl7.fhir.validation/src/test/java/org/hl7/fhir/validation/instance/utils/UrlUtilTest.java b/org.hl7.fhir.validation/src/test/java/org/hl7/fhir/validation/instance/utils/UrlUtilTest.java new file mode 100644 index 000000000..552275780 --- /dev/null +++ b/org.hl7.fhir.validation/src/test/java/org/hl7/fhir/validation/instance/utils/UrlUtilTest.java @@ -0,0 +1,47 @@ +package org.hl7.fhir.validation.instance.utils; + +import org.junit.jupiter.params.ParameterizedTest; +import org.junit.jupiter.params.provider.ValueSource; + +import static org.junit.jupiter.api.Assertions.assertNotNull; +import static org.junit.jupiter.api.Assertions.assertNull; + +import org.hl7.fhir.r5.context.IWorkerContext; +import org.hl7.fhir.r5.test.utils.TestingUtilities; + +public class UrlUtilTest { + private IWorkerContext context; + + public UrlUtilTest() { + context = TestingUtilities.getSharedWorkerContext(); + } + + @ParameterizedTest + @ValueSource(strings = { + "data:text/plain,FHIR", + "data:,Hello%2C%20World%21", + "data:text/plain;base64,SGVsbG8sIFdvcmxkIQ==", + "data:text/plain;charset=UTF-8,FHIR", + "data:application/octet-stream;base64,RkhJUg==", + "http://hl7.org/fhir", + "https://hl7.org/fhir", + "https://en.wikipedia.org/w/index.php?title=Fast_Healthcare_Interoperability_Resources", + }) + public void testValidUrl(String url) { + assertNull(UrlUtil.checkValidUrl(url, context)); + } + + @ParameterizedTest + @ValueSource(strings = { + "", + "data:text/plain", // Actual data is missing + "data:application/octet-stream;base64", // Actual data is missing + "data:application/octet-stream;base64,A==", // Improperly padded base64 data + "data:application/octet-stream;base64,A(B=", // Invalid character in the default encoding + "data:application/octet-stream;base64AAAA", // Missing comma in data URL + }) + public void testInvalidUrl(String url) { + assertNotNull(UrlUtil.checkValidUrl(url, context)); + } + +}