Merge pull request #1247 from pieter-edelman-nictiz/issue-133
#133: Escape just embedded HTML tags, not autolinks in Markdown
This commit is contained in:
commit
28d8491f05
|
@ -1,38 +1,40 @@
|
|||
package org.hl7.fhir.utilities;
|
||||
|
||||
/*
|
||||
Copyright (c) 2011+, HL7, Inc.
|
||||
All rights reserved.
|
||||
|
||||
Redistribution and use in source and binary forms, with or without modification,
|
||||
are permitted provided that the following conditions are met:
|
||||
|
||||
* Redistributions of source code must retain the above copyright notice, this
|
||||
list of conditions and the following disclaimer.
|
||||
* Redistributions in binary form must reproduce the above copyright notice,
|
||||
this list of conditions and the following disclaimer in the documentation
|
||||
and/or other materials provided with the distribution.
|
||||
* Neither the name of HL7 nor the names of its contributors may be used to
|
||||
endorse or promote products derived from this software without specific
|
||||
prior written permission.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
|
||||
ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
|
||||
WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
|
||||
IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT,
|
||||
INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
|
||||
NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
|
||||
PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
|
||||
WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
POSSIBILITY OF SUCH DAMAGE.
|
||||
|
||||
*/
|
||||
/*
|
||||
Copyright (c) 2011+, HL7, Inc.
|
||||
All rights reserved.
|
||||
|
||||
Redistribution and use in source and binary forms, with or without modification,
|
||||
are permitted provided that the following conditions are met:
|
||||
|
||||
* Redistributions of source code must retain the above copyright notice, this
|
||||
list of conditions and the following disclaimer.
|
||||
* Redistributions in binary form must reproduce the above copyright notice,
|
||||
this list of conditions and the following disclaimer in the documentation
|
||||
and/or other materials provided with the distribution.
|
||||
* Neither the name of HL7 nor the names of its contributors may be used to
|
||||
endorse or promote products derived from this software without specific
|
||||
prior written permission.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
|
||||
ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
|
||||
WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
|
||||
IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT,
|
||||
INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
|
||||
NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
|
||||
PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
|
||||
WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
POSSIBILITY OF SUCH DAMAGE.
|
||||
|
||||
*/
|
||||
|
||||
|
||||
|
||||
import java.util.Collections;
|
||||
import java.util.Set;
|
||||
import java.util.regex.Matcher;
|
||||
import java.util.regex.Pattern;
|
||||
|
||||
import org.commonmark.Extension;
|
||||
import org.commonmark.ext.gfm.tables.TablesExtension;
|
||||
|
@ -157,7 +159,13 @@ public class MarkDownProcessor {
|
|||
mid = -1;
|
||||
}
|
||||
}
|
||||
return false;
|
||||
|
||||
// Detect autolinks, which should start with a scheme, followed by a colon, followed by some content. Whitespace
|
||||
// is not allowed and for practical purposes, the scheme is considered to consist of lowercase ASCII characters
|
||||
// only.
|
||||
Pattern autolinkPattern = Pattern.compile("<[a-z]+:[^\\s]+>");
|
||||
Matcher autolinkMatcher = autolinkPattern.matcher(s);
|
||||
return autolinkMatcher.find();
|
||||
}
|
||||
|
||||
|
||||
|
@ -193,8 +201,8 @@ public class MarkDownProcessor {
|
|||
* and the way commonmark specifies that < is handled in content. For control reasons, the FHIR specification does
|
||||
* not allow raw html tags in the markdown
|
||||
*
|
||||
* This check finds any raw <[x] where [x] is any alpha character, and prepends \ to it so that it
|
||||
* renders as a < (e.g. gets escaped in the output HTML)
|
||||
* This check finds any raw html tag and prepends \ to it so that it renders as a < (e.g. gets escaped in the output
|
||||
* HTML)
|
||||
*
|
||||
* This is public to enable testing (not for direct use otherwise)
|
||||
*
|
||||
|
@ -202,21 +210,16 @@ public class MarkDownProcessor {
|
|||
* @return
|
||||
*/
|
||||
public static String preProcess(String source) {
|
||||
StringBuilder b = new StringBuilder();
|
||||
for (int i = 0; i < source.length(); i++) {
|
||||
char last = i > 0 ? source.charAt(i-1) : 0;
|
||||
char current = source.charAt(i);
|
||||
char next = i < source.length() -1 ? source.charAt(i+1) : 0;
|
||||
if (current == '<' && Character.isAlphabetic(next) && last != '\\') {
|
||||
b.append('\\');
|
||||
b.append(current);
|
||||
} else {
|
||||
b.append(current);
|
||||
}
|
||||
}
|
||||
return b.toString();
|
||||
}
|
||||
// Escape all unescaped open and closing tags ('<' or '</', followed by an ASCII letter, followed by ASCII
|
||||
// letters, digits and/or hyphens).
|
||||
String processed = source.replaceAll("(?<!\\\\)<(\\/)?([A-Za-z][A-Za-z0-9-]*[\\s>])", "\\\\<$1$2");
|
||||
|
||||
// Escape all other HTML tags: HTML comments, processing instructions, declarations and CDATA sections --
|
||||
// everything starting with '<?' or '<!'.
|
||||
processed = processed.replaceAll("<(!|\\?)", "\\\\<$1");
|
||||
|
||||
return processed;
|
||||
}
|
||||
|
||||
private String processCommonMark(String source) {
|
||||
Set<Extension> extensions = Collections.singleton(TablesExtension.create());
|
||||
|
|
|
@ -25,6 +25,10 @@ class MarkdownTests {
|
|||
testMarkdown("this [is(link)] a test string", false);
|
||||
testMarkdown("this [is](link a test string", false);
|
||||
testMarkdown("this [i]s] (link) a test string", false);
|
||||
testMarkdown("this <https://hl7.org> is a test string", true);
|
||||
testMarkdown("this < https://hl7.org> is a test string", false);
|
||||
testMarkdown("this <mailto:info@hl7.org> is a test string", true);
|
||||
testMarkdown("this <b>is</b> a test string", false);
|
||||
testMarkdown("## heading", true);
|
||||
testMarkdown("# heading", false);
|
||||
testMarkdown("## heading", false);
|
||||
|
|
|
@ -18,6 +18,9 @@ public class MarkdownPreprocessorTesting {
|
|||
public void testHTML() throws IOException {
|
||||
assertEquals(MarkDownProcessor.preProcess("<type>"), "\\<type>");
|
||||
assertEquals(MarkDownProcessor.preProcess("\\<type>"), "\\<type>");
|
||||
assertEquals(MarkDownProcessor.preProcess("</type>"), "\\</type>");
|
||||
assertEquals(MarkDownProcessor.preProcess("<http://hl7.org>"), "<http://hl7.org>");
|
||||
assertEquals(MarkDownProcessor.preProcess("\\<http://hl7.org>"), "\\<http://hl7.org>");
|
||||
}
|
||||
|
||||
|
||||
|
|
Loading…
Reference in New Issue