Issue #1658: recognize that mimetype subtypes can contain dashes (#1664)

* Issue #1658: recognize that mimetype subtypes can contain dashes

* Simple refactor + test case

* Allow plus sign in mimetype subtype, expand tests

* Break out base64 test and url test methods to util classes

* Add additional negative tests for data: URLs

---------

Co-authored-by: dotasek <david.otasek@smilecdr.com>
This commit is contained in:
Pieter Edelman 2024-06-26 15:09:46 +02:00 committed by GitHub
parent 94937c9d30
commit cb330b4029
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
7 changed files with 293 additions and 136 deletions

View File

@ -33,14 +33,12 @@ package org.hl7.fhir.validation.instance;
import static org.apache.commons.lang3.StringUtils.isBlank; import static org.apache.commons.lang3.StringUtils.isBlank;
import static org.apache.commons.lang3.StringUtils.isNotBlank; import static org.apache.commons.lang3.StringUtils.isNotBlank;
import java.io.ByteArrayInputStream;
import java.io.File; import java.io.File;
import java.io.IOException; import java.io.IOException;
import java.io.InputStream; import java.io.InputStream;
import java.math.BigDecimal; import java.math.BigDecimal;
import java.net.URI; import java.net.URI;
import java.net.URISyntaxException; import java.net.URISyntaxException;
import java.nio.charset.StandardCharsets;
import java.util.ArrayList; import java.util.ArrayList;
import java.util.Arrays; import java.util.Arrays;
import java.util.Calendar; import java.util.Calendar;
@ -57,7 +55,6 @@ import java.util.UUID;
import javax.annotation.Nonnull; import javax.annotation.Nonnull;
import org.apache.commons.codec.binary.Base64InputStream;
import org.apache.commons.lang3.NotImplementedException; import org.apache.commons.lang3.NotImplementedException;
import org.apache.commons.lang3.StringUtils; import org.apache.commons.lang3.StringUtils;
import org.fhir.ucum.Decimal; import org.fhir.ucum.Decimal;
@ -229,18 +226,7 @@ import org.hl7.fhir.validation.instance.type.StructureMapValidator;
import org.hl7.fhir.validation.instance.type.StructureMapValidator.VariableDefn; import org.hl7.fhir.validation.instance.type.StructureMapValidator.VariableDefn;
import org.hl7.fhir.validation.instance.type.StructureMapValidator.VariableSet; import org.hl7.fhir.validation.instance.type.StructureMapValidator.VariableSet;
import org.hl7.fhir.validation.instance.type.ValueSetValidator; import org.hl7.fhir.validation.instance.type.ValueSetValidator;
import org.hl7.fhir.validation.instance.utils.CanonicalResourceLookupResult; import org.hl7.fhir.validation.instance.utils.*;
import org.hl7.fhir.validation.instance.utils.CanonicalTypeSorter;
import org.hl7.fhir.validation.instance.utils.ChildIterator;
import org.hl7.fhir.validation.instance.utils.ElementInfo;
import org.hl7.fhir.validation.instance.utils.EnableWhenEvaluator;
import org.hl7.fhir.validation.instance.utils.FHIRPathExpressionFixer;
import org.hl7.fhir.validation.instance.utils.IndexedElement;
import org.hl7.fhir.validation.instance.utils.NodeStack;
import org.hl7.fhir.validation.instance.utils.ResolvedReference;
import org.hl7.fhir.validation.instance.utils.ResourceValidationTracker;
import org.hl7.fhir.validation.instance.utils.StructureDefinitionSorterByUrl;
import org.hl7.fhir.validation.instance.utils.ValidationContext;
import org.w3c.dom.Document; import org.w3c.dom.Document;
/** /**
@ -2848,12 +2834,12 @@ public class InstanceValidator extends BaseValidator implements IResourceValidat
if (type.equals("base64Binary")) { if (type.equals("base64Binary")) {
String encoded = e.primitiveValue(); String encoded = e.primitiveValue();
if (isNotBlank(encoded)) { if (isNotBlank(encoded)) {
boolean bok = isValidBase64(encoded); boolean bok = Base64Util.isValidBase64(encoded);
if (!bok) { if (!bok) {
String value = encoded.length() < 100 ? encoded : "(snip)"; String value = encoded.length() < 100 ? encoded : "(snip)";
ok = rule(errors, NO_RULE_DATE, IssueType.INVALID, e.line(), e.col(), path, false, I18nConstants.TYPE_SPECIFIC_CHECKS_DT_BASE64_VALID, value) && ok; ok = rule(errors, NO_RULE_DATE, IssueType.INVALID, e.line(), e.col(), path, false, I18nConstants.TYPE_SPECIFIC_CHECKS_DT_BASE64_VALID, value) && ok;
} else { } else {
boolean wsok = !base64HasWhitespace(encoded); boolean wsok = !Base64Util.base64HasWhitespace(encoded);
if (VersionUtilities.isR5VerOrLater(this.context.getVersion())) { if (VersionUtilities.isR5VerOrLater(this.context.getVersion())) {
ok = rule(errors, NO_RULE_DATE, IssueType.INVALID, e.line(), e.col(), path, wsok, I18nConstants.TYPE_SPECIFIC_CHECKS_DT_BASE64_NO_WS_ERROR) && ok; ok = rule(errors, NO_RULE_DATE, IssueType.INVALID, e.line(), e.col(), path, wsok, I18nConstants.TYPE_SPECIFIC_CHECKS_DT_BASE64_NO_WS_ERROR) && ok;
} else { } else {
@ -2861,7 +2847,7 @@ public class InstanceValidator extends BaseValidator implements IResourceValidat
} }
} }
if (bok && context.hasExtension(ToolingExtensions.EXT_MAX_SIZE)) { if (bok && context.hasExtension(ToolingExtensions.EXT_MAX_SIZE)) {
int size = countBase64DecodedBytes(encoded); int size = Base64Util.countBase64DecodedBytes(encoded);
long def = Long.parseLong(ToolingExtensions.readStringExtension(context, ToolingExtensions.EXT_MAX_SIZE)); long def = Long.parseLong(ToolingExtensions.readStringExtension(context, ToolingExtensions.EXT_MAX_SIZE));
ok = rule(errors, NO_RULE_DATE, IssueType.STRUCTURE, e.line(), e.col(), path, size <= def, I18nConstants.TYPE_SPECIFIC_CHECKS_DT_BASE64_TOO_LONG, size, def) && ok; ok = rule(errors, NO_RULE_DATE, IssueType.STRUCTURE, e.line(), e.col(), path, size <= def, I18nConstants.TYPE_SPECIFIC_CHECKS_DT_BASE64_TOO_LONG, size, def) && ok;
} }
@ -3280,70 +3266,6 @@ public class InstanceValidator extends BaseValidator implements IResourceValidat
return false; return false;
} }
/**
* Technically this is not bulletproof as some invalid base64 won't be caught,
* but I think it's good enough. The original code used Java8 Base64 decoder
* but I've replaced it with a regex for 2 reasons:
* 1. This code will run on any version of Java
* 2. This code doesn't actually decode, which is much easier on memory use for big payloads
*/
private boolean isValidBase64(String theEncoded) {
if (theEncoded == null) {
return false;
}
int charCount = 0;
boolean ok = true;
for (int i = 0; i < theEncoded.length(); i++) {
char nextChar = theEncoded.charAt(i);
if (Utilities.isWhitespace(nextChar)) {
continue;
}
if (Character.isLetterOrDigit(nextChar)) {
charCount++;
}
if (nextChar == '/' || nextChar == '=' || nextChar == '+') {
charCount++;
}
}
if (charCount > 0 && charCount % 4 != 0) {
ok = false;
}
return ok;
}
private boolean base64HasWhitespace(String theEncoded) {
if (theEncoded == null) {
return false;
}
for (int i = 0; i < theEncoded.length(); i++) {
char nextChar = theEncoded.charAt(i);
if (Utilities.isWhitespace(nextChar)) {
return true;
}
}
return false;
}
private int countBase64DecodedBytes(String theEncoded) {
Base64InputStream inputStream = new Base64InputStream(new ByteArrayInputStream(theEncoded.getBytes(StandardCharsets.UTF_8)));
try {
try {
for (int counter = 0; ; counter++) {
if (inputStream.read() == -1) {
return counter;
}
}
} finally {
inputStream.close();
}
} catch (IOException e) {
throw new IllegalStateException(e); // should not happen
}
}
private boolean isDefinitionURL(String url) { private boolean isDefinitionURL(String url) {
return Utilities.existsInList(url, return Utilities.existsInList(url,
@ -3498,10 +3420,10 @@ public class InstanceValidator extends BaseValidator implements IResourceValidat
for (XhtmlNode node : list) { for (XhtmlNode node : list) {
if (node.getNodeType() == NodeType.Element) { if (node.getNodeType() == NodeType.Element) {
if ("a".equals(node.getName())) { if ("a".equals(node.getName())) {
String msg = checkValidUrl(node.getAttribute("href")); String msg = UrlUtil.checkValidUrl(node.getAttribute("href"), context);
ok = rule(errors, NO_RULE_DATE, IssueType.INVALID, e.line(), e.col(), path, msg == null, I18nConstants.XHTML_URL_INVALID, node.getAttribute("href"), msg) && ok; ok = rule(errors, NO_RULE_DATE, IssueType.INVALID, e.line(), e.col(), path, msg == null, I18nConstants.XHTML_URL_INVALID, node.getAttribute("href"), msg) && ok;
} else if ("img".equals(node.getName())) { } else if ("img".equals(node.getName())) {
String msg = checkValidUrl(node.getAttribute("src")); String msg = UrlUtil.checkValidUrl(node.getAttribute("src"), context);
ok = rule(errors, NO_RULE_DATE, IssueType.INVALID, e.line(), e.col(), path, msg == null, I18nConstants.XHTML_URL_INVALID, node.getAttribute("src"), msg) && ok; ok = rule(errors, NO_RULE_DATE, IssueType.INVALID, e.line(), e.col(), path, msg == null, I18nConstants.XHTML_URL_INVALID, node.getAttribute("src"), msg) && ok;
} }
ok = checkUrls(errors, e, path, node.getChildNodes()) && ok; ok = checkUrls(errors, e, path, node.getChildNodes()) && ok;
@ -3510,56 +3432,6 @@ public class InstanceValidator extends BaseValidator implements IResourceValidat
return ok; return ok;
} }
private String checkValidUrl(String value) {
if (value == null) {
return null;
}
if (Utilities.noString(value)) {
return context.formatMessage(I18nConstants.XHTML_URL_EMPTY);
}
if (value.startsWith("data:")) {
String[] p = value.substring(5).split("\\,");
if (p.length < 2) {
return context.formatMessage(I18nConstants.XHTML_URL_DATA_NO_DATA, value);
} else if (p.length > 2) {
return context.formatMessage(I18nConstants.XHTML_URL_DATA_DATA_INVALID_COMMA, value);
} else if (!p[0].endsWith(";base64") || !isValidBase64(p[1])) {
return context.formatMessage(I18nConstants.XHTML_URL_DATA_DATA_INVALID, value);
} else {
if (p[0].startsWith(" ")) {
p[0] = Utilities.trimWS(p[0]);
}
String mMsg = checkValidMimeType(p[0].substring(0, p[0].lastIndexOf(";")));
if (mMsg != null) {
return context.formatMessage(I18nConstants.XHTML_URL_DATA_MIMETYPE, value, mMsg);
}
}
return null;
} else {
Set<Character> invalidChars = new HashSet<>();
int c = 0;
for (char ch : value.toCharArray()) {
if (!(Character.isDigit(ch) || Character.isAlphabetic(ch) || Utilities.existsInList(ch, ';', '?', ':', '@', '&', '=', '+', '$', '.', ',', '/', '%', '-', '_', '~', '#', '[', ']', '!', '\'', '(', ')', '*', '|' ))) {
c++;
invalidChars.add(ch);
}
}
if (invalidChars.isEmpty()) {
return null;
} else {
return context.formatMessagePlural(c, I18nConstants.XHTML_URL_INVALID_CHARS, invalidChars.toString());
}
}
}
private String checkValidMimeType(String mt) {
if (!mt.matches("^(\\w+|\\*)\\/(\\w+|\\*)((;\\s*(\\w+)=\\s*(\\S+))?)$")) {
return "Mime type invalid";
}
return null;
}
private boolean checkInnerNS(List<ValidationMessage> errors, Element e, String path, List<XhtmlNode> list) { private boolean checkInnerNS(List<ValidationMessage> errors, Element e, String path, List<XhtmlNode> list) {
boolean ok = true; boolean ok = true;
for (XhtmlNode node : list) { for (XhtmlNode node : list) {
@ -3826,9 +3698,9 @@ public class InstanceValidator extends BaseValidator implements IResourceValidat
String b64 = element.getChildValue("data"); String b64 = element.getChildValue("data");
// Note: If the value isn't valid, we're not adding an error here, as the test to the // Note: If the value isn't valid, we're not adding an error here, as the test to the
// child Base64Binary will catch it and we don't want to log it twice // child Base64Binary will catch it and we don't want to log it twice
boolean bok = isValidBase64(b64); boolean bok = Base64Util.isValidBase64(b64);
if (bok && element.hasChild("size", false)) { if (bok && element.hasChild("size", false)) {
size = countBase64DecodedBytes(b64); size = Base64Util.countBase64DecodedBytes(b64);
String sz = element.getChildValue("size"); String sz = element.getChildValue("size");
ok = rule(errors, NO_RULE_DATE, IssueType.STRUCTURE, element.line(), element.col(), path, Long.toString(size).equals(sz), I18nConstants.TYPE_SPECIFIC_CHECKS_DT_ATT_SIZE_CORRECT, sz, size) && ok; ok = rule(errors, NO_RULE_DATE, IssueType.STRUCTURE, element.line(), element.col(), path, Long.toString(size).equals(sz), I18nConstants.TYPE_SPECIFIC_CHECKS_DT_ATT_SIZE_CORRECT, sz, size) && ok;
} }

View File

@ -0,0 +1,73 @@
package org.hl7.fhir.validation.instance.utils;
import java.io.ByteArrayInputStream;
import java.io.IOException;
import java.nio.charset.StandardCharsets;
import org.apache.commons.codec.binary.Base64InputStream;
import org.hl7.fhir.utilities.Utilities;
public class Base64Util {
/**
* Technically this is not bulletproof as some invalid base64 won't be caught,
* but I think it's good enough. The original code used Java8 Base64 decoder
* but I've replaced it with a regex for 2 reasons:
* 1. This code will run on any version of Java
* 2. This code doesn't actually decode, which is much easier on memory use for big payloads
*/
public static boolean isValidBase64(String theEncoded) {
if (theEncoded == null) {
return false;
}
int charCount = 0;
boolean ok = true;
for (int i = 0; i < theEncoded.length(); i++) {
char nextChar = theEncoded.charAt(i);
if (Utilities.isWhitespace(nextChar)) {
continue;
}
if (Character.isLetterOrDigit(nextChar)) {
charCount++;
}
if (nextChar == '/' || nextChar == '=' || nextChar == '+') {
charCount++;
}
}
if (charCount > 0 && charCount % 4 != 0) {
ok = false;
}
return ok;
}
public static boolean base64HasWhitespace(String theEncoded) {
if (theEncoded == null) {
return false;
}
for (int i = 0; i < theEncoded.length(); i++) {
char nextChar = theEncoded.charAt(i);
if (Utilities.isWhitespace(nextChar)) {
return true;
}
}
return false;
}
public static int countBase64DecodedBytes(String theEncoded) {
Base64InputStream inputStream = new Base64InputStream(new ByteArrayInputStream(theEncoded.getBytes(StandardCharsets.UTF_8)));
try {
try {
for (int counter = 0; ; counter++) {
if (inputStream.read() == -1) {
return counter;
}
}
} finally {
inputStream.close();
}
} catch (IOException e) {
throw new IllegalStateException(e); // should not happen
}
}
}

View File

@ -0,0 +1,10 @@
package org.hl7.fhir.validation.instance.utils;
public class MimeTypeUtil {
public static String checkValidMimeType(String mt) {
if (!mt.matches("^(\\w+|\\*)/([\\w-+]+|\\*)((;\\s*(\\w+)=\\s*(\\S+))?)$")) {
return "Mime type invalid";
}
return null;
}
}

View File

@ -0,0 +1,57 @@
package org.hl7.fhir.validation.instance.utils;
import java.util.HashSet;
import java.util.Set;
import org.hl7.fhir.r5.context.IWorkerContext;
import org.hl7.fhir.utilities.i18n.I18nConstants;
import org.hl7.fhir.utilities.Utilities;
import org.hl7.fhir.validation.instance.utils.Base64Util;
public class UrlUtil {
public static String checkValidUrl(String value, IWorkerContext context) {
if (value == null) {
return null;
}
if (Utilities.noString(value)) {
return context.formatMessage(I18nConstants.XHTML_URL_EMPTY);
}
if (value.startsWith("data:")) {
String[] p = value.substring(5).split("\\,");
if (p.length < 2) {
return context.formatMessage(I18nConstants.XHTML_URL_DATA_NO_DATA, value);
} else if (p.length > 2) {
return context.formatMessage(I18nConstants.XHTML_URL_DATA_DATA_INVALID_COMMA, value);
} else if (p[0].endsWith(";base64") && !Base64Util.isValidBase64(p[1])) {
return context.formatMessage(I18nConstants.XHTML_URL_DATA_DATA_INVALID, value);
} else {
if (p[0].startsWith(" ")) {
p[0] = Utilities.trimWS(p[0]);
}
String mimetype = (p[0].endsWith(";base64")) ? p[0].substring(0, p[0].lastIndexOf(";")) : p[0];
if (!mimetype.trim().isEmpty()) {
String mMsg = MimeTypeUtil.checkValidMimeType(mimetype);
if (mMsg != null) {
return context.formatMessage(I18nConstants.XHTML_URL_DATA_MIMETYPE, value, mMsg);
}
}
}
return null;
} else {
Set<Character> invalidChars = new HashSet<>();
int c = 0;
for (char ch : value.toCharArray()) {
if (!(Character.isDigit(ch) || Character.isAlphabetic(ch) || Utilities.existsInList(ch, ';', '?', ':', '@', '&', '=', '+', '$', '.', ',', '/', '%', '-', '_', '~', '#', '[', ']', '!', '\'', '(', ')', '*', '|' ))) {
c++;
invalidChars.add(ch);
}
}
if (invalidChars.isEmpty()) {
return null;
} else {
return context.formatMessagePlural(c, I18nConstants.XHTML_URL_INVALID_CHARS, invalidChars.toString());
}
}
}
}

View File

@ -0,0 +1,58 @@
package org.hl7.fhir.validation.instance.utils;
import org.junit.jupiter.params.ParameterizedTest;
import org.junit.jupiter.params.provider.ValueSource;
import org.junit.jupiter.params.provider.CsvSource;
import static org.junit.jupiter.api.Assertions.assertTrue;
import static org.junit.jupiter.api.Assertions.assertFalse;
import static org.junit.jupiter.api.Assertions.assertEquals;
public class Base64UtilTest {
@ParameterizedTest
@ValueSource(strings = {
"AAAA",
"AAAABBBB",
"AA AA BB BB ",
"ABCDEFGHIJKLMNOPQRSTUVWXYZ abcdefghijklmnopqrstuvwxyz 0123456789 /+===="
})
public void testValidBase64(String base64) {
assertTrue(Base64Util.isValidBase64(base64));
}
@ParameterizedTest
@ValueSource(strings = {
"A==", // Improperly padded
"A(B=", // Invalid character in the default encoding
})
public void testInvalidBase64(String base64) {
assertFalse(Base64Util.isValidBase64(base64));
}
@ParameterizedTest
@ValueSource(strings = {
"AA AA",
"AA AA",
"AAAA ",
"AA\nAA",
})
public void testBase64WithWhitespace(String base64) {
assertTrue(Base64Util.base64HasWhitespace(base64));
}
public void testBase64WithoutWhitespace(String base64) {
assertFalse(Base64Util.base64HasWhitespace("AAAA"));
}
@ParameterizedTest
@CsvSource({
"AAAA, 3",
"AAAABBBB, 6",
"AAA=, 2",
})
public void testValidBase64(String base64, int length) {
assertEquals(Base64Util.countBase64DecodedBytes(base64), length);
}
}

View File

@ -0,0 +1,40 @@
package org.hl7.fhir.validation.instance.utils;
import org.junit.jupiter.params.ParameterizedTest;
import org.junit.jupiter.params.provider.ValueSource;
import static org.junit.jupiter.api.Assertions.assertNotNull;
import static org.junit.jupiter.api.Assertions.assertNull;
public class MimeTypeUtilTest {
@ParameterizedTest
@ValueSource(strings = {
"application/fhir",
"text/plain",
"text/plain;charset=UTF-8",
"application/octet-stream",
"application/xhtml+xml",
})
public void testValidMimeTypes(String mimeType) {
testMimeType(mimeType, true);
}
@ParameterizedTest
@ValueSource(strings = {
"application/fhir;anything", // semicolon and everything after shouldn't work if it's not a parameter=value pair
"application/fhir;parameter=", // parameter without value shouldn't work.
})
public void testInvalidMimeTypes(String mimeType) {
testMimeType(mimeType, false);
}
private static void testMimeType(String mimeType, boolean valid) {
String result = MimeTypeUtil.checkValidMimeType(mimeType);
if (valid) {
assertNull(result);
} else {
assertNotNull(result);
}
}
}

View File

@ -0,0 +1,47 @@
package org.hl7.fhir.validation.instance.utils;
import org.junit.jupiter.params.ParameterizedTest;
import org.junit.jupiter.params.provider.ValueSource;
import static org.junit.jupiter.api.Assertions.assertNotNull;
import static org.junit.jupiter.api.Assertions.assertNull;
import org.hl7.fhir.r5.context.IWorkerContext;
import org.hl7.fhir.r5.test.utils.TestingUtilities;
public class UrlUtilTest {
private IWorkerContext context;
public UrlUtilTest() {
context = TestingUtilities.getSharedWorkerContext();
}
@ParameterizedTest
@ValueSource(strings = {
"data:text/plain,FHIR",
"data:,Hello%2C%20World%21",
"data:text/plain;base64,SGVsbG8sIFdvcmxkIQ==",
"data:text/plain;charset=UTF-8,FHIR",
"data:application/octet-stream;base64,RkhJUg==",
"http://hl7.org/fhir",
"https://hl7.org/fhir",
"https://en.wikipedia.org/w/index.php?title=Fast_Healthcare_Interoperability_Resources",
})
public void testValidUrl(String url) {
assertNull(UrlUtil.checkValidUrl(url, context));
}
@ParameterizedTest
@ValueSource(strings = {
"",
"data:text/plain", // Actual data is missing
"data:application/octet-stream;base64", // Actual data is missing
"data:application/octet-stream;base64,A==", // Improperly padded base64 data
"data:application/octet-stream;base64,A(B=", // Invalid character in the default encoding
"data:application/octet-stream;base64AAAA", // Missing comma in data URL
})
public void testInvalidUrl(String url) {
assertNotNull(UrlUtil.checkValidUrl(url, context));
}
}