#133: Escape just embedded HTML tags, not autolinks in Markdown
This commit is contained in:
parent
8ba62413ff
commit
07c95455a4
|
@ -33,6 +33,8 @@ package org.hl7.fhir.utilities;
|
||||||
|
|
||||||
import java.util.Collections;
|
import java.util.Collections;
|
||||||
import java.util.Set;
|
import java.util.Set;
|
||||||
|
import java.util.regex.Matcher;
|
||||||
|
import java.util.regex.Pattern;
|
||||||
|
|
||||||
import org.commonmark.Extension;
|
import org.commonmark.Extension;
|
||||||
import org.commonmark.ext.gfm.tables.TablesExtension;
|
import org.commonmark.ext.gfm.tables.TablesExtension;
|
||||||
|
@ -157,7 +159,13 @@ public class MarkDownProcessor {
|
||||||
mid = -1;
|
mid = -1;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
return false;
|
|
||||||
|
// Detect autolinks, which should start with a scheme, followed by a colon, followed by some content. Whitespace
|
||||||
|
// is not allowed and for practical purposes, the scheme is considered to consist of lowercase ASCII characters
|
||||||
|
// only.
|
||||||
|
Pattern autolinkPattern = Pattern.compile("<[a-z]+:[^\\s]+>");
|
||||||
|
Matcher autolinkMatcher = autolinkPattern.matcher(s);
|
||||||
|
return autolinkMatcher.find();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
@ -193,8 +201,8 @@ public class MarkDownProcessor {
|
||||||
* and the way commonmark specifies that < is handled in content. For control reasons, the FHIR specification does
|
* and the way commonmark specifies that < is handled in content. For control reasons, the FHIR specification does
|
||||||
* not allow raw html tags in the markdown
|
* not allow raw html tags in the markdown
|
||||||
*
|
*
|
||||||
* This check finds any raw <[x] where [x] is any alpha character, and prepends \ to it so that it
|
* This check finds any raw html tag and prepends \ to it so that it renders as a < (e.g. gets escaped in the output
|
||||||
* renders as a < (e.g. gets escaped in the output HTML)
|
* HTML)
|
||||||
*
|
*
|
||||||
* This is public to enable testing (not for direct use otherwise)
|
* This is public to enable testing (not for direct use otherwise)
|
||||||
*
|
*
|
||||||
|
@ -202,21 +210,16 @@ public class MarkDownProcessor {
|
||||||
* @return
|
* @return
|
||||||
*/
|
*/
|
||||||
public static String preProcess(String source) {
|
public static String preProcess(String source) {
|
||||||
StringBuilder b = new StringBuilder();
|
// Escape all open and closing tags ('<' or '</', followed by an ASCII letter, followed by ASCII letters, digits
|
||||||
for (int i = 0; i < source.length(); i++) {
|
// and/or hyphens).
|
||||||
char last = i > 0 ? source.charAt(i-1) : 0;
|
String processed = source.replaceAll("</?([A-Za-z][A-Za-z0-9-]*[\\s>])", "\\\\<$1");
|
||||||
char current = source.charAt(i);
|
|
||||||
char next = i < source.length() -1 ? source.charAt(i+1) : 0;
|
|
||||||
if (current == '<' && Character.isAlphabetic(next) && last != '\\') {
|
|
||||||
b.append('\\');
|
|
||||||
b.append(current);
|
|
||||||
} else {
|
|
||||||
b.append(current);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
return b.toString();
|
|
||||||
}
|
|
||||||
|
|
||||||
|
// Escape all other HTML tags: HTML comments, processing instructions, declarations and CDATA sections --
|
||||||
|
// everything starting with '<?' or '<!'.
|
||||||
|
processed = processed.replaceAll("<(!|\\?)", "\\\\<$1");
|
||||||
|
|
||||||
|
return processed;
|
||||||
|
}
|
||||||
|
|
||||||
private String processCommonMark(String source) {
|
private String processCommonMark(String source) {
|
||||||
Set<Extension> extensions = Collections.singleton(TablesExtension.create());
|
Set<Extension> extensions = Collections.singleton(TablesExtension.create());
|
||||||
|
|
|
@ -25,6 +25,10 @@ class MarkdownTests {
|
||||||
testMarkdown("this [is(link)] a test string", false);
|
testMarkdown("this [is(link)] a test string", false);
|
||||||
testMarkdown("this [is](link a test string", false);
|
testMarkdown("this [is](link a test string", false);
|
||||||
testMarkdown("this [i]s] (link) a test string", false);
|
testMarkdown("this [i]s] (link) a test string", false);
|
||||||
|
testMarkdown("this <https://hl7.org> is a test string", true);
|
||||||
|
testMarkdown("this < https://hl7.org> is a test string", false);
|
||||||
|
testMarkdown("this <mailto:info@hl7.org> is a test string", true);
|
||||||
|
testMarkdown("this <b>is</b> a test string", false);
|
||||||
testMarkdown("## heading", true);
|
testMarkdown("## heading", true);
|
||||||
testMarkdown("# heading", false);
|
testMarkdown("# heading", false);
|
||||||
testMarkdown("## heading", false);
|
testMarkdown("## heading", false);
|
||||||
|
|
Loading…
Reference in New Issue