From 07c95455a47070ed30a7188979c5d0ef0e5208f3 Mon Sep 17 00:00:00 2001 From: Pieter Edelman Date: Tue, 2 May 2023 20:40:49 +0200 Subject: [PATCH 1/2] #133: Escape just embedded HTML tags, not autolinks in Markdown --- .../hl7/fhir/utilities/MarkDownProcessor.java | 37 ++++++++++--------- .../org/hl7/fhir/utilities/MarkdownTests.java | 4 ++ 2 files changed, 24 insertions(+), 17 deletions(-) diff --git a/org.hl7.fhir.utilities/src/main/java/org/hl7/fhir/utilities/MarkDownProcessor.java b/org.hl7.fhir.utilities/src/main/java/org/hl7/fhir/utilities/MarkDownProcessor.java index f37763f09..495425b28 100644 --- a/org.hl7.fhir.utilities/src/main/java/org/hl7/fhir/utilities/MarkDownProcessor.java +++ b/org.hl7.fhir.utilities/src/main/java/org/hl7/fhir/utilities/MarkDownProcessor.java @@ -33,6 +33,8 @@ package org.hl7.fhir.utilities; import java.util.Collections; import java.util.Set; +import java.util.regex.Matcher; +import java.util.regex.Pattern; import org.commonmark.Extension; import org.commonmark.ext.gfm.tables.TablesExtension; @@ -157,7 +159,13 @@ public class MarkDownProcessor { mid = -1; } } - return false; + + // Detect autolinks, which should start with a scheme, followed by a colon, followed by some content. Whitespace + // is not allowed and for practical purposes, the scheme is considered to consist of lowercase ASCII characters + // only. + Pattern autolinkPattern = Pattern.compile("<[a-z]+:[^\\s]+>"); + Matcher autolinkMatcher = autolinkPattern.matcher(s); + return autolinkMatcher.find(); } @@ -193,8 +201,8 @@ public class MarkDownProcessor { * and the way commonmark specifies that < is handled in content. For control reasons, the FHIR specification does * not allow raw html tags in the markdown * - * This check finds any raw <[x] where [x] is any alpha character, and prepends \ to it so that it - * renders as a < (e.g. gets escaped in the output HTML) + * This check finds any raw html tag and prepends \ to it so that it renders as a < (e.g. gets escaped in the output + * HTML) * * This is public to enable testing (not for direct use otherwise) * @@ -202,21 +210,16 @@ public class MarkDownProcessor { * @return */ public static String preProcess(String source) { - StringBuilder b = new StringBuilder(); - for (int i = 0; i < source.length(); i++) { - char last = i > 0 ? source.charAt(i-1) : 0; - char current = source.charAt(i); - char next = i < source.length() -1 ? source.charAt(i+1) : 0; - if (current == '<' && Character.isAlphabetic(next) && last != '\\') { - b.append('\\'); - b.append(current); - } else { - b.append(current); - } - } - return b.toString(); - } + // Escape all open and closing tags ('<' or '])", "\\\\<$1"); + // Escape all other HTML tags: HTML comments, processing instructions, declarations and CDATA sections -- + // everything starting with ' extensions = Collections.singleton(TablesExtension.create()); diff --git a/org.hl7.fhir.utilities/src/test/java/org/hl7/fhir/utilities/MarkdownTests.java b/org.hl7.fhir.utilities/src/test/java/org/hl7/fhir/utilities/MarkdownTests.java index 030a08076..5f6175cc9 100644 --- a/org.hl7.fhir.utilities/src/test/java/org/hl7/fhir/utilities/MarkdownTests.java +++ b/org.hl7.fhir.utilities/src/test/java/org/hl7/fhir/utilities/MarkdownTests.java @@ -25,6 +25,10 @@ class MarkdownTests { testMarkdown("this [is(link)] a test string", false); testMarkdown("this [is](link a test string", false); testMarkdown("this [i]s] (link) a test string", false); + testMarkdown("this is a test string", true); + testMarkdown("this < https://hl7.org> is a test string", false); + testMarkdown("this is a test string", true); + testMarkdown("this is a test string", false); testMarkdown("## heading", true); testMarkdown("# heading", false); testMarkdown("## heading", false); From 33694d9e2b1e50e9239b286539f2dd3881bcff1a Mon Sep 17 00:00:00 2001 From: Pieter Edelman Date: Tue, 2 May 2023 20:40:49 +0200 Subject: [PATCH 2/2] 133: Improve regex and tests to escape just embedded HTML tags, not autolinks in Markdown --- .../hl7/fhir/utilities/MarkDownProcessor.java | 62 +++++++++---------- .../tests/MarkdownPreprocessorTesting.java | 3 + 2 files changed, 34 insertions(+), 31 deletions(-) diff --git a/org.hl7.fhir.utilities/src/main/java/org/hl7/fhir/utilities/MarkDownProcessor.java b/org.hl7.fhir.utilities/src/main/java/org/hl7/fhir/utilities/MarkDownProcessor.java index 495425b28..818960411 100644 --- a/org.hl7.fhir.utilities/src/main/java/org/hl7/fhir/utilities/MarkDownProcessor.java +++ b/org.hl7.fhir.utilities/src/main/java/org/hl7/fhir/utilities/MarkDownProcessor.java @@ -1,33 +1,33 @@ package org.hl7.fhir.utilities; -/* - Copyright (c) 2011+, HL7, Inc. - All rights reserved. - - Redistribution and use in source and binary forms, with or without modification, - are permitted provided that the following conditions are met: - - * Redistributions of source code must retain the above copyright notice, this - list of conditions and the following disclaimer. - * Redistributions in binary form must reproduce the above copyright notice, - this list of conditions and the following disclaimer in the documentation - and/or other materials provided with the distribution. - * Neither the name of HL7 nor the names of its contributors may be used to - endorse or promote products derived from this software without specific - prior written permission. - - THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND - ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED - WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. - IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, - INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT - NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR - PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, - WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) - ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE - POSSIBILITY OF SUCH DAMAGE. - - */ +/* + Copyright (c) 2011+, HL7, Inc. + All rights reserved. + + Redistribution and use in source and binary forms, with or without modification, + are permitted provided that the following conditions are met: + + * Redistributions of source code must retain the above copyright notice, this + list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright notice, + this list of conditions and the following disclaimer in the documentation + and/or other materials provided with the distribution. + * Neither the name of HL7 nor the names of its contributors may be used to + endorse or promote products derived from this software without specific + prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND + ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED + WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. + IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, + INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT + NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR + PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, + WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + POSSIBILITY OF SUCH DAMAGE. + + */ @@ -210,9 +210,9 @@ public class MarkDownProcessor { * @return */ public static String preProcess(String source) { - // Escape all open and closing tags ('<' or '])", "\\\\<$1"); + // Escape all unescaped open and closing tags ('<' or '])", "\\\\<$1$2"); // Escape all other HTML tags: HTML comments, processing instructions, declarations and CDATA sections -- // everything starting with '"), "\\"); assertEquals(MarkDownProcessor.preProcess("\\"), "\\"); + assertEquals(MarkDownProcessor.preProcess(""), "\\"); + assertEquals(MarkDownProcessor.preProcess(""), ""); + assertEquals(MarkDownProcessor.preProcess("\\"), "\\"); }