From 0c26f0972118f3904d7fe9fd885fa27a36bfcc6e Mon Sep 17 00:00:00 2001 From: Grahame Grieve Date: Tue, 7 Mar 2023 06:25:45 +1100 Subject: [PATCH] revise whitespace handling for unicode conformance in validator --- .../hl7/fhir/dstu2/test/TestingUtilities.java | 2 +- .../dstu2016may/test/TestingUtilities.java | 2 +- .../dstu3/test/support/TestingUtilities.java | 2 +- .../fhir/r4/test/utils/TestingUtilities.java | 2 +- .../fhir/r4b/test/utils/TestingUtilities.java | 2 +- .../conformance/profile/ProfileUtilities.java | 2 +- .../fhir/r5/test/utils/CompareUtilities.java | 3 +- .../java/org/hl7/fhir/r5/utils/FHIRLexer.java | 2 +- .../org/hl7/fhir/utilities/Utilities.java | 78 ++++++++++++++----- .../fhir/utilities/i18n/I18nConstants.java | 1 + .../utilities/xhtml/CDANarrativeFormat.java | 3 +- .../xhtml/HierarchicalTableGenerator.java | 3 +- .../src/main/resources/Messages.properties | 1 + .../org/hl7/fhir/utilities/UtilitiesTest.java | 12 +++ .../instance/InstanceValidator.java | 37 +++------ .../utils/FHIRPathExpressionFixer.java | 3 + .../conversion/tests/UtilitiesXTests.java | 3 +- 17 files changed, 102 insertions(+), 56 deletions(-) diff --git a/org.hl7.fhir.dstu2/src/test/java/org/hl7/fhir/dstu2/test/TestingUtilities.java b/org.hl7.fhir.dstu2/src/test/java/org/hl7/fhir/dstu2/test/TestingUtilities.java index b1f8b2559..bcb79ab62 100644 --- a/org.hl7.fhir.dstu2/src/test/java/org/hl7/fhir/dstu2/test/TestingUtilities.java +++ b/org.hl7.fhir.dstu2/src/test/java/org/hl7/fhir/dstu2/test/TestingUtilities.java @@ -134,7 +134,7 @@ public class TestingUtilities { } private static Node skipBlankText(Node node) { - while (node != null && (((node.getNodeType() == Node.TEXT_NODE) && Utilities.isWhitespace(node.getTextContent())) || (node.getNodeType() == Node.COMMENT_NODE))) + while (node != null && (((node.getNodeType() == Node.TEXT_NODE) && Utilities.isAllWhitespace(node.getTextContent())) || (node.getNodeType() == Node.COMMENT_NODE))) node = node.getNextSibling(); return node; } diff --git a/org.hl7.fhir.dstu2016may/src/test/java/org/hl7/fhir/dstu2016may/test/TestingUtilities.java b/org.hl7.fhir.dstu2016may/src/test/java/org/hl7/fhir/dstu2016may/test/TestingUtilities.java index 7f3e5a065..0d6bb172f 100644 --- a/org.hl7.fhir.dstu2016may/src/test/java/org/hl7/fhir/dstu2016may/test/TestingUtilities.java +++ b/org.hl7.fhir.dstu2016may/src/test/java/org/hl7/fhir/dstu2016may/test/TestingUtilities.java @@ -134,7 +134,7 @@ public class TestingUtilities { } private static Node skipBlankText(Node node) { - while (node != null && (((node.getNodeType() == Node.TEXT_NODE) && Utilities.isWhitespace(node.getTextContent())) || (node.getNodeType() == Node.COMMENT_NODE))) + while (node != null && (((node.getNodeType() == Node.TEXT_NODE) && Utilities.isAllWhitespace(node.getTextContent())) || (node.getNodeType() == Node.COMMENT_NODE))) node = node.getNextSibling(); return node; } diff --git a/org.hl7.fhir.dstu3/src/test/java/org/hl7/fhir/dstu3/test/support/TestingUtilities.java b/org.hl7.fhir.dstu3/src/test/java/org/hl7/fhir/dstu3/test/support/TestingUtilities.java index 4371ea1a5..860617993 100644 --- a/org.hl7.fhir.dstu3/src/test/java/org/hl7/fhir/dstu3/test/support/TestingUtilities.java +++ b/org.hl7.fhir.dstu3/src/test/java/org/hl7/fhir/dstu3/test/support/TestingUtilities.java @@ -164,7 +164,7 @@ public class TestingUtilities extends BaseTestingUtilities { } private static Node skipBlankText(Node node) { - while (node != null && (((node.getNodeType() == Node.TEXT_NODE) && Utilities.isWhitespace(node.getTextContent())) || (node.getNodeType() == Node.COMMENT_NODE))) + while (node != null && (((node.getNodeType() == Node.TEXT_NODE) && Utilities.isAllWhitespace(node.getTextContent())) || (node.getNodeType() == Node.COMMENT_NODE))) node = node.getNextSibling(); return node; } diff --git a/org.hl7.fhir.r4/src/main/java/org/hl7/fhir/r4/test/utils/TestingUtilities.java b/org.hl7.fhir.r4/src/main/java/org/hl7/fhir/r4/test/utils/TestingUtilities.java index 0a34c6e9c..d8d3d50ce 100644 --- a/org.hl7.fhir.r4/src/main/java/org/hl7/fhir/r4/test/utils/TestingUtilities.java +++ b/org.hl7.fhir.r4/src/main/java/org/hl7/fhir/r4/test/utils/TestingUtilities.java @@ -242,7 +242,7 @@ public class TestingUtilities { } private static Node skipBlankText(Node node) { - while (node != null && (((node.getNodeType() == Node.TEXT_NODE) && Utilities.isWhitespace(node.getTextContent())) || (node.getNodeType() == Node.COMMENT_NODE))) + while (node != null && (((node.getNodeType() == Node.TEXT_NODE) && Utilities.isAllWhitespace(node.getTextContent())) || (node.getNodeType() == Node.COMMENT_NODE))) node = node.getNextSibling(); return node; } diff --git a/org.hl7.fhir.r4b/src/main/java/org/hl7/fhir/r4b/test/utils/TestingUtilities.java b/org.hl7.fhir.r4b/src/main/java/org/hl7/fhir/r4b/test/utils/TestingUtilities.java index b164f0cf1..cc5112fe0 100644 --- a/org.hl7.fhir.r4b/src/main/java/org/hl7/fhir/r4b/test/utils/TestingUtilities.java +++ b/org.hl7.fhir.r4b/src/main/java/org/hl7/fhir/r4b/test/utils/TestingUtilities.java @@ -257,7 +257,7 @@ public class TestingUtilities extends BaseTestingUtilities { } private static Node skipBlankText(Node node) { - while (node != null && (((node.getNodeType() == Node.TEXT_NODE) && Utilities.isWhitespace(node.getTextContent())) || (node.getNodeType() == Node.COMMENT_NODE))) + while (node != null && (((node.getNodeType() == Node.TEXT_NODE) && Utilities.isAllWhitespace(node.getTextContent())) || (node.getNodeType() == Node.COMMENT_NODE))) node = node.getNextSibling(); return node; } diff --git a/org.hl7.fhir.r5/src/main/java/org/hl7/fhir/r5/conformance/profile/ProfileUtilities.java b/org.hl7.fhir.r5/src/main/java/org/hl7/fhir/r5/conformance/profile/ProfileUtilities.java index caf5ffa31..3864901c3 100644 --- a/org.hl7.fhir.r5/src/main/java/org/hl7/fhir/r5/conformance/profile/ProfileUtilities.java +++ b/org.hl7.fhir.r5/src/main/java/org/hl7/fhir/r5/conformance/profile/ProfileUtilities.java @@ -919,7 +919,7 @@ public class ProfileUtilities extends TranslatingUtilities { throw new FHIRException(context.formatMessage(I18nConstants.ILLEGAL_PATH__IN_DIFFERENTIAL_IN__NAME_PORTION_EXCEEDS_64_CHARS_IN_LENGTH, p, url)); } for (char ch : pp.toCharArray()) { - if (Character.isWhitespace(ch)) { + if (Utilities.isWhitespace(ch)) { throw new FHIRException(context.formatMessage(I18nConstants.ILLEGAL_PATH__IN_DIFFERENTIAL_IN__NO_UNICODE_WHITESPACE, p, url)); } if (Utilities.existsInList(ch, ',', ':', ';', '\'', '"', '/', '|', '?', '!', '@', '#', '$', '%', '^', '&', '*', '(', ')', '{', '}')) { diff --git a/org.hl7.fhir.r5/src/main/java/org/hl7/fhir/r5/test/utils/CompareUtilities.java b/org.hl7.fhir.r5/src/main/java/org/hl7/fhir/r5/test/utils/CompareUtilities.java index 9471c138b..63c88095d 100644 --- a/org.hl7.fhir.r5/src/main/java/org/hl7/fhir/r5/test/utils/CompareUtilities.java +++ b/org.hl7.fhir.r5/src/main/java/org/hl7/fhir/r5/test/utils/CompareUtilities.java @@ -1,6 +1,7 @@ package org.hl7.fhir.r5.test.utils; import org.apache.commons.codec.binary.Base64; +import org.apache.commons.lang3.StringUtils; import org.hl7.fhir.utilities.CSFile; import org.hl7.fhir.utilities.TextFile; import org.hl7.fhir.utilities.ToolGlobalSettings; @@ -147,7 +148,7 @@ public class CompareUtilities extends BaseTestingUtilities { } private static Node skipBlankText(Node node) { - while (node != null && (((node.getNodeType() == Node.TEXT_NODE) && Utilities.isWhitespace(node.getTextContent())) || (node.getNodeType() == Node.COMMENT_NODE))) + while (node != null && (((node.getNodeType() == Node.TEXT_NODE) && StringUtils.isWhitespace(node.getTextContent())) || (node.getNodeType() == Node.COMMENT_NODE))) node = node.getNextSibling(); return node; } diff --git a/org.hl7.fhir.r5/src/main/java/org/hl7/fhir/r5/utils/FHIRLexer.java b/org.hl7.fhir.r5/src/main/java/org/hl7/fhir/r5/utils/FHIRLexer.java index 4d550feff..f7df5b03e 100644 --- a/org.hl7.fhir.r5/src/main/java/org/hl7/fhir/r5/utils/FHIRLexer.java +++ b/org.hl7.fhir.r5/src/main/java/org/hl7/fhir/r5/utils/FHIRLexer.java @@ -338,7 +338,7 @@ public class FHIRLexer { comments.add(source.substring(start, cursor).trim()); cursor = cursor + 2; } - } else if (Character.isWhitespace(source.charAt(cursor))) { + } else if (Utilities.isWhitespace(source.charAt(cursor))) { last13 = currentLocation.checkChar(source.charAt(cursor), last13); cursor++; } else { diff --git a/org.hl7.fhir.utilities/src/main/java/org/hl7/fhir/utilities/Utilities.java b/org.hl7.fhir.utilities/src/main/java/org/hl7/fhir/utilities/Utilities.java index e07d6b7c7..ed50e9ad6 100644 --- a/org.hl7.fhir.utilities/src/main/java/org/hl7/fhir/utilities/Utilities.java +++ b/org.hl7.fhir.utilities/src/main/java/org/hl7/fhir/utilities/Utilities.java @@ -826,7 +826,7 @@ public class Utilities { boolean isWhitespace = false; for (int i = 0; i < s.length(); i++) { char c = s.charAt(i); - if (!Character.isWhitespace(c)) { + if (!isWhitespace(c)) { b.append(Character.toLowerCase(c)); isWhitespace = false; } else if (!isWhitespace) { @@ -861,15 +861,6 @@ public class Utilities { } - public static boolean isWhitespace(String s) { - boolean ok = true; - for (int i = 0; i < s.length(); i++) - ok = ok && Character.isWhitespace(s.charAt(i)); - return ok; - - } - - public static String URLEncode(String string) { try { return URLEncoder.encode(string, "UTF-8"); @@ -1002,7 +993,11 @@ public class Utilities { b.append("\\\""); else if (c == '\\') b.append("\\\\"); - else if (((int) c) < 32) + else if (c == ' ') + b.append(" "); + else if (isWhitespace(c)) { + b.append("\\u"+Integer.toHexString(c)); + } else if (((int) c) < 32) b.append("\\u" + Utilities.padLeft(String.valueOf((int) c), '0', 4)); else b.append(c); @@ -1086,15 +1081,15 @@ public class Utilities { int expectedByte = in1.read(); while (expectedByte != -1) { - boolean w1 = isWhitespace(expectedByte); + boolean w1 = Character.isWhitespace(expectedByte); if (w1) - while (isWhitespace(expectedByte)) + while (Character.isWhitespace(expectedByte)) expectedByte = in1.read(); int foundByte = in2.read(); if (w1) { - if (!isWhitespace(foundByte)) + if (!Character.isWhitespace(foundByte)) return false; - while (isWhitespace(foundByte)) + while (Character.isWhitespace(foundByte)) foundByte = in2.read(); } if (expectedByte != foundByte) @@ -1121,10 +1116,6 @@ public class Utilities { } } - private static boolean isWhitespace(int b) { - return b == 9 || b == 10 || b == 13 || b == 32; - } - public static boolean compareIgnoreWhitespace(String fn1, String fn2) throws IOException { return compareIgnoreWhitespace(new File(fn1), new File(fn2)); @@ -1880,5 +1871,54 @@ public class Utilities { public static boolean isValidCRName(String name) { return name != null && name.matches("[A-Z]([A-Za-z0-9_]){1,254}"); } + + public static boolean isAllWhitespace(String s) { + if (Utilities.noString(s)) { + return true; + } + for (char ch : s.toCharArray()) { + if (!isWhitespace(ch)) { + return false; + } + } + return true; + } + + public static String trimWS(String s) { + if (Utilities.noString(s)) { + return s; + } + int start = 0; + while (start < s.length() && isWhitespace(s.charAt(start))) { + start++; + } + if (start == s.length()) { + return ""; + } + int end = s.length() - 1; + while (end >= 0 && isWhitespace(s.charAt(end))) { + end--; + } + if (start > end) { + return ""; + } + return s.substring(start, end+1); + } + + // from https://en.wikipedia.org/wiki/Whitespace_character#Unicode + public static boolean isWhitespace(int ch) { + return Utilities.existsInList(ch, '\u0009', '\n', '\u000B','\u000C','\r','\u0020','\u0085','\u00A0', + '\u1680','\u2000','\u2001','\u2002','\u2003','\u2004','\u2005','\u2006','\u2007','\u2008','\u2009','\u200A', + '\u2028', '\u2029', '\u202F', '\u205F', '\u3000'); + } + +//public static boolean !isWhitespace(String s) { +//boolean ok = true; +//for (int i = 0; i < s.length(); i++) +// ok = ok && Character.isWhitespace(s.charAt(i)); +//return ok; +// +//} + } diff --git a/org.hl7.fhir.utilities/src/main/java/org/hl7/fhir/utilities/i18n/I18nConstants.java b/org.hl7.fhir.utilities/src/main/java/org/hl7/fhir/utilities/i18n/I18nConstants.java index 5818006aa..f5288f393 100644 --- a/org.hl7.fhir.utilities/src/main/java/org/hl7/fhir/utilities/i18n/I18nConstants.java +++ b/org.hl7.fhir.utilities/src/main/java/org/hl7/fhir/utilities/i18n/I18nConstants.java @@ -534,6 +534,7 @@ public class I18nConstants { public static final String TYPE_SPECIFIC_CHECKS_DT_QTY_NO_ANNOTATIONS = "TYPE_SPECIFIC_CHECKS_DT_QTY_NO_ANNOTATIONS"; public static final String TYPE_SPECIFIC_CHECKS_DT_STRING_LENGTH = "Type_Specific_Checks_DT_String_Length"; public static final String TYPE_SPECIFIC_CHECKS_DT_STRING_WS = "Type_Specific_Checks_DT_String_WS"; + public static final String TYPE_SPECIFIC_CHECKS_DT_STRING_WS_ALL = "Type_Specific_Checks_DT_String_WS_ALL"; public static final String TYPE_SPECIFIC_CHECKS_DT_TIME_VALID = "Type_Specific_Checks_DT_Time_Valid"; public static final String TYPE_SPECIFIC_CHECKS_DT_URI_OID = "Type_Specific_Checks_DT_URI_OID"; public static final String TYPE_SPECIFIC_CHECKS_DT_URI_UUID = "Type_Specific_Checks_DT_URI_UUID"; diff --git a/org.hl7.fhir.utilities/src/main/java/org/hl7/fhir/utilities/xhtml/CDANarrativeFormat.java b/org.hl7.fhir.utilities/src/main/java/org/hl7/fhir/utilities/xhtml/CDANarrativeFormat.java index 4472b899a..da8b47459 100644 --- a/org.hl7.fhir.utilities/src/main/java/org/hl7/fhir/utilities/xhtml/CDANarrativeFormat.java +++ b/org.hl7.fhir.utilities/src/main/java/org/hl7/fhir/utilities/xhtml/CDANarrativeFormat.java @@ -33,6 +33,7 @@ package org.hl7.fhir.utilities.xhtml; import java.io.IOException; +import org.apache.commons.lang3.StringUtils; import org.hl7.fhir.exceptions.FHIRException; import org.hl7.fhir.utilities.Utilities; import org.hl7.fhir.utilities.xml.IXMLWriter; @@ -81,7 +82,7 @@ public class CDANarrativeFormat { xn.addComment(n.getTextContent()); return; case Node.TEXT_NODE: - if (!Utilities.isWhitespace(n.getTextContent())) + if (!StringUtils.isWhitespace(n.getTextContent())) xn.addText(n.getTextContent()); return; case Node.ELEMENT_NODE: diff --git a/org.hl7.fhir.utilities/src/main/java/org/hl7/fhir/utilities/xhtml/HierarchicalTableGenerator.java b/org.hl7.fhir.utilities/src/main/java/org/hl7/fhir/utilities/xhtml/HierarchicalTableGenerator.java index ddccd0ad5..e4a6d5d9e 100644 --- a/org.hl7.fhir.utilities/src/main/java/org/hl7/fhir/utilities/xhtml/HierarchicalTableGenerator.java +++ b/org.hl7.fhir.utilities/src/main/java/org/hl7/fhir/utilities/xhtml/HierarchicalTableGenerator.java @@ -77,6 +77,7 @@ import javax.imageio.ImageIO; import org.apache.commons.codec.binary.Base64; import org.apache.commons.io.FileUtils; +import org.apache.commons.lang3.StringUtils; import org.commonmark.node.Node; import org.commonmark.parser.Parser; import org.commonmark.renderer.html.HtmlRenderer; @@ -303,7 +304,7 @@ public class HierarchicalTableGenerator extends TranslatingUtilities { myPieces.add(new Piece("br")); } if (c.getNodeType() == NodeType.Text) { - if (!Utilities.isWhitespace(c.getContent())) + if (!StringUtils.isWhitespace(c.getContent())) addNode(myPieces, c, style); } else if ("p".equals(c.getName())) { for (XhtmlNode g : c.getChildNodes()) { diff --git a/org.hl7.fhir.utilities/src/main/resources/Messages.properties b/org.hl7.fhir.utilities/src/main/resources/Messages.properties index fe96be755..7a038b115 100644 --- a/org.hl7.fhir.utilities/src/main/resources/Messages.properties +++ b/org.hl7.fhir.utilities/src/main/resources/Messages.properties @@ -218,6 +218,7 @@ Type_Specific_Checks_DT_Primitive_ValueExt = Primitive types must have a value o Type_Specific_Checks_DT_Primitive_WS = Primitive types should not only be whitespace Type_Specific_Checks_DT_String_Length = value is longer than permitted maximum length of 1 MB (1048576 bytes) Type_Specific_Checks_DT_String_WS = value should not start or finish with whitespace ''{0}'' +Type_Specific_Checks_DT_String_WS_ALL = value should not be all whitespace ''{0}'' Type_Specific_Checks_DT_Time_Valid = Not a valid time ({0}) Type_Specific_Checks_DT_URI_OID = URI values cannot start with oid: Type_Specific_Checks_DT_URI_UUID = URI values cannot start with uuid: diff --git a/org.hl7.fhir.utilities/src/test/java/org/hl7/fhir/utilities/UtilitiesTest.java b/org.hl7.fhir.utilities/src/test/java/org/hl7/fhir/utilities/UtilitiesTest.java index 98732e35c..02316c1fc 100644 --- a/org.hl7.fhir.utilities/src/test/java/org/hl7/fhir/utilities/UtilitiesTest.java +++ b/org.hl7.fhir.utilities/src/test/java/org/hl7/fhir/utilities/UtilitiesTest.java @@ -229,5 +229,17 @@ class UtilitiesTest { Assertions.assertEquals(17, Utilities.getDatePrecision("1900-06-06T14:00:00.000-10:00")); } + + @Test + @DisplayName("trimWS tests") + void testTrimWS() { + Assertions.assertEquals("", Utilities.trimWS("")); + Assertions.assertEquals("", Utilities.trimWS(" ")); + Assertions.assertEquals("t", Utilities.trimWS(" t ")); + Assertions.assertEquals(".", Utilities.trimWS("\r.")); + Assertions.assertEquals("# %", Utilities.trimWS("# %")); + Assertions.assertEquals("", Utilities.trimWS("\u0009\n\u000B\u000C\r\u0020\u0085\u00A0\u1680\u2000\u2001\u2002\u2003\u2004\u2005\u2006\u2007\u2008\u2009\u200A\u2028\u2029\u202F\u205F\u3000")); + } + } \ No newline at end of file diff --git a/org.hl7.fhir.validation/src/main/java/org/hl7/fhir/validation/instance/InstanceValidator.java b/org.hl7.fhir.validation/src/main/java/org/hl7/fhir/validation/instance/InstanceValidator.java index 11f75c27a..4bec122ea 100644 --- a/org.hl7.fhir.validation/src/main/java/org/hl7/fhir/validation/instance/InstanceValidator.java +++ b/org.hl7.fhir.validation/src/main/java/org/hl7/fhir/validation/instance/InstanceValidator.java @@ -2258,7 +2258,7 @@ public class InstanceValidator extends BaseValidator implements IResourceValidat ok = rule(errors, NO_RULE_DATE, IssueType.INVALID, e.line(), e.col(), path, e.hasChildren(), I18nConstants.TYPE_SPECIFIC_CHECKS_DT_PRIMITIVE_VALUEEXT) && ok; else if (e.primitiveValue().length() == 0) ok = rule(errors, NO_RULE_DATE, IssueType.INVALID, e.line(), e.col(), path, e.hasChildren(), I18nConstants.TYPE_SPECIFIC_CHECKS_DT_PRIMITIVE_NOTEMPTY) && ok; - else if (StringUtils.isWhitespace(e.primitiveValue())) + else if (Utilities.isAllWhitespace(e.primitiveValue())) warning(errors, NO_RULE_DATE, IssueType.INVALID, e.line(), e.col(), path, e.hasChildren(), I18nConstants.TYPE_SPECIFIC_CHECKS_DT_PRIMITIVE_WS); if (context.hasBinding()) { ok = rule(errors, NO_RULE_DATE, IssueType.CODEINVALID, e.line(), e.col(), path, context.getBinding().getStrength() != BindingStrength.REQUIRED, I18nConstants.Terminology_TX_Code_ValueSet_MISSING) && ok; @@ -2306,7 +2306,7 @@ public class InstanceValidator extends BaseValidator implements IResourceValidat String url = e.primitiveValue(); ok = rule(errors, NO_RULE_DATE, IssueType.INVALID, e.line(), e.col(), path, !url.startsWith("oid:"), I18nConstants.TYPE_SPECIFIC_CHECKS_DT_URI_OID) && ok; ok = rule(errors, NO_RULE_DATE, IssueType.INVALID, e.line(), e.col(), path, !url.startsWith("uuid:"), I18nConstants.TYPE_SPECIFIC_CHECKS_DT_URI_UUID) && ok; - ok = rule(errors, NO_RULE_DATE, IssueType.INVALID, e.line(), e.col(), path, url.equals(url.trim().replace(" ", "")) + ok = rule(errors, NO_RULE_DATE, IssueType.INVALID, e.line(), e.col(), path, url.equals(Utilities.trimWS(url).replace(" ", "")) // work around an old invalid example in a core package || "http://www.acme.com/identifiers/patient or urn:ietf:rfc:3986 if the Identifier.value itself is a full uri".equals(url), I18nConstants.TYPE_SPECIFIC_CHECKS_DT_URI_WS, url) && ok; ok = rule(errors, NO_RULE_DATE, IssueType.INVALID, e.line(), e.col(), path, !context.hasMaxLength() || context.getMaxLength() == 0 || url.length() <= context.getMaxLength(), I18nConstants.TYPE_SPECIFIC_CHECKS_DT_PRIMITIVE_LENGTH, context.getMaxLength()) && ok; @@ -2353,7 +2353,9 @@ public class InstanceValidator extends BaseValidator implements IResourceValidat } if (type.equalsIgnoreCase("string") && e.hasPrimitiveValue()) { if (rule(errors, NO_RULE_DATE, IssueType.INVALID, e.line(), e.col(), path, e.primitiveValue() == null || e.primitiveValue().length() > 0, I18nConstants.TYPE_SPECIFIC_CHECKS_DT_PRIMITIVE_NOTEMPTY)) { - warning(errors, NO_RULE_DATE, IssueType.INVALID, e.line(), e.col(), path, e.primitiveValue() == null || e.primitiveValue().trim().equals(e.primitiveValue()), I18nConstants.TYPE_SPECIFIC_CHECKS_DT_STRING_WS, prepWSPresentation(e.primitiveValue())); + if (warning(errors, NO_RULE_DATE, IssueType.INVALID, e.line(), e.col(), path, e.primitiveValue() == null || !Utilities.isAllWhitespace(e.primitiveValue()), I18nConstants.TYPE_SPECIFIC_CHECKS_DT_STRING_WS_ALL, prepWSPresentation(e.primitiveValue()))) { + warning(errors, NO_RULE_DATE, IssueType.INVALID, e.line(), e.col(), path, e.primitiveValue() == null || Utilities.trimWS(e.primitiveValue()).equals(e.primitiveValue()), I18nConstants.TYPE_SPECIFIC_CHECKS_DT_STRING_WS, prepWSPresentation(e.primitiveValue())); + } if (rule(errors, NO_RULE_DATE, IssueType.INVALID, e.line(), e.col(), path, e.primitiveValue().length() <= 1048576, I18nConstants.TYPE_SPECIFIC_CHECKS_DT_STRING_LENGTH)) { ok = rule(errors, NO_RULE_DATE, IssueType.INVALID, e.line(), e.col(), path, !context.hasMaxLength() || context.getMaxLength() == 0 || e.primitiveValue().length() <= context.getMaxLength(), I18nConstants.TYPE_SPECIFIC_CHECKS_DT_PRIMITIVE_LENGTH, context.getMaxLength()) && ok; } else { @@ -2623,24 +2625,7 @@ public class InstanceValidator extends BaseValidator implements IResourceValidat if (Utilities.noString(s)) { return ""; } - if (!StringUtils.containsWhitespace(s.trim())) { - return s; - } - int b = 0; - while (Character.isWhitespace(s.charAt(b))) { - b++; - } - while (!Character.isWhitespace(s.charAt(b))) { - b++; - } - int e = s.length() - 1; - while (Character.isWhitespace(s.charAt(e))) { - e--; - } - while (!Character.isWhitespace(s.charAt(e))) { - e--; - } - return s.substring(0, b)+"..."+s.substring(e+1); + return Utilities.escapeJson(s); } public boolean validateReference(ValidatorHostContext hostContext, List errors, String path, String type, ElementDefinition context, Element e, String url) { @@ -2803,7 +2788,7 @@ public class InstanceValidator extends BaseValidator implements IResourceValidat boolean ok = true; for (int i = 0; i < theEncoded.length(); i++) { char nextChar = theEncoded.charAt(i); - if (Character.isWhitespace(nextChar)) { + if (Utilities.isWhitespace(nextChar)) { continue; } if (Character.isLetterOrDigit(nextChar)) { @@ -2826,7 +2811,7 @@ public class InstanceValidator extends BaseValidator implements IResourceValidat } for (int i = 0; i < theEncoded.length(); i++) { char nextChar = theEncoded.charAt(i); - if (Character.isWhitespace(nextChar)) { + if (Utilities.isWhitespace(nextChar)) { return true; } } @@ -2930,7 +2915,7 @@ public class InstanceValidator extends BaseValidator implements IResourceValidat return context.formatMessage(I18nConstants.XHTML_URL_DATA_DATA_INVALID, value); } else { if (p[0].startsWith(" ")) { - p[0] = p[0].trim(); + p[0] = Utilities.trimWS(p[0]); } String mMsg = checkValidMimeType(p[0].substring(0, p[0].lastIndexOf(";"))); if (mMsg != null) { @@ -4039,7 +4024,7 @@ public class InstanceValidator extends BaseValidator implements IResourceValidat } private boolean passesCodeWhitespaceRules(String v) { - if (!v.trim().equals(v)) + if (!Utilities.trimWS(v).equals(v)) return false; boolean lastWasSpace = true; for (char c : v.toCharArray()) { @@ -4048,7 +4033,7 @@ public class InstanceValidator extends BaseValidator implements IResourceValidat return false; else lastWasSpace = true; - } else if (Character.isWhitespace(c) || c == '\u00A0') + } else if (Utilities.isWhitespace(c)) return false; else lastWasSpace = false; diff --git a/org.hl7.fhir.validation/src/main/java/org/hl7/fhir/validation/instance/utils/FHIRPathExpressionFixer.java b/org.hl7.fhir.validation/src/main/java/org/hl7/fhir/validation/instance/utils/FHIRPathExpressionFixer.java index f27f696dc..ec1932a85 100644 --- a/org.hl7.fhir.validation/src/main/java/org/hl7/fhir/validation/instance/utils/FHIRPathExpressionFixer.java +++ b/org.hl7.fhir.validation/src/main/java/org/hl7/fhir/validation/instance/utils/FHIRPathExpressionFixer.java @@ -134,6 +134,9 @@ public class FHIRPathExpressionFixer { if (regex.equals("-?(0|[1-9][0-9]{0,17})(\\.[0-9]{1,17})?([eE][+-]?[0-9]{1,9}})?")) { return "-?(0|[1-9][0-9]{0,17})(\\.[0-9]{1,17})?([eE](0|[+\\-]?[1-9][0-9]{0,9}))?"; } + if (regex.equals("[ \\r\\n\\t\\S]+")) { + return "^[\\s\\r\\n\\t\\S]+$"; + } return regex; } diff --git a/org.hl7.fhir.validation/src/test/java/org/hl7/fhir/conversion/tests/UtilitiesXTests.java b/org.hl7.fhir.validation/src/test/java/org/hl7/fhir/conversion/tests/UtilitiesXTests.java index b3f75fae2..6d6ff3d4e 100644 --- a/org.hl7.fhir.validation/src/test/java/org/hl7/fhir/conversion/tests/UtilitiesXTests.java +++ b/org.hl7.fhir.validation/src/test/java/org/hl7/fhir/conversion/tests/UtilitiesXTests.java @@ -45,6 +45,7 @@ import javax.xml.parsers.DocumentBuilderFactory; import org.apache.commons.codec.binary.Base64; import org.apache.commons.io.IOUtils; +import org.apache.commons.lang3.StringUtils; import org.fhir.ucum.UcumEssenceService; import org.hl7.fhir.convertors.loaders.loaderR5.NullLoaderKnowledgeProviderR5; import org.hl7.fhir.convertors.loaders.loaderR5.R2016MayToR5Loader; @@ -265,7 +266,7 @@ public class UtilitiesXTests { } private static Node skipBlankText(Node node) { - while (node != null && (((node.getNodeType() == Node.TEXT_NODE) && Utilities.isWhitespace(node.getTextContent())) || (node.getNodeType() == Node.COMMENT_NODE))) + while (node != null && (((node.getNodeType() == Node.TEXT_NODE) && StringUtils.isWhitespace(node.getTextContent())) || (node.getNodeType() == Node.COMMENT_NODE))) node = node.getNextSibling(); return node; }