From fa6b87a1bf054b56449da656ddb1483f7c42a100 Mon Sep 17 00:00:00 2001 From: Martin Brennan Date: Mon, 22 Nov 2021 10:43:03 +1000 Subject: [PATCH] SECURITY: Strip unrendered unicode bidirectional chars in code blocks (#15032) When rendering the markdown code blocks we replace the offending characters in the output string with spans highlighting a textual representation of the character, along with a title attribute with information about why the character was highlighted. The list of characters stripped by this fix, which are the bidirectional characters considered relevant, are: U+202A U+202B U+202C U+202D U+202E U+2066 U+2067 U+2068 U+2069 --- .../stylesheets/common/base/topic-post.scss | 8 ++ config/locales/server.en.yml | 1 + lib/pretty_text.rb | 32 +++++++ spec/components/pretty_text_spec.rb | 89 +++++++++++++++++++ 4 files changed, 130 insertions(+) diff --git a/app/assets/stylesheets/common/base/topic-post.scss b/app/assets/stylesheets/common/base/topic-post.scss index efb68f7c946..60e6621e50b 100644 --- a/app/assets/stylesheets/common/base/topic-post.scss +++ b/app/assets/stylesheets/common/base/topic-post.scss @@ -827,6 +827,14 @@ pre { background: var(--blend-primary-secondary-5); max-height: 500px; } + + .bidi-warning, + .bidi-warning span { + font-style: normal; + background-color: var(--highlight); + color: var(--danger); + cursor: help; + } } .copy-codeblocks { diff --git a/config/locales/server.en.yml b/config/locales/server.en.yml index 9cd23f9b6d6..cfc7881fdea 100644 --- a/config/locales/server.en.yml +++ b/config/locales/server.en.yml @@ -717,6 +717,7 @@ en: post: image_placeholder: broken: "This image is broken" + hidden_bidi_character: "Bidirectional characters can change the order that text is rendered. This could be used to obscure malicious code." has_likes: one: "%{count} Like" other: "%{count} Likes" diff --git a/lib/pretty_text.rb b/lib/pretty_text.rb index a298ce4f80a..fd0b26e8ba0 100644 --- a/lib/pretty_text.rb +++ b/lib/pretty_text.rb @@ -5,6 +5,19 @@ require 'nokogiri' require 'erb' module PrettyText + DANGEROUS_BIDI_CHARACTERS = [ + "\u202A", + "\u202B", + "\u202C", + "\u202D", + "\u202E", + "\u2066", + "\u2067", + "\u2068", + "\u2069", + ].freeze + DANGEROUS_BIDI_REGEXP = Regexp.new(DANGEROUS_BIDI_CHARACTERS.join("|")).freeze + @mutex = Mutex.new @ctx_init = Mutex.new @@ -278,6 +291,7 @@ module PrettyText add_nofollow = !options[:omit_nofollow] && SiteSetting.add_rel_nofollow_to_user_content add_rel_attributes_to_user_content(doc, add_nofollow) + strip_hidden_unicode_bidirectional_characters(doc) if SiteSetting.enable_mentions add_mentions(doc, user_id: opts[:user_id]) @@ -290,6 +304,24 @@ module PrettyText loofah_fragment.scrub!(scrubber).to_html end + def self.strip_hidden_unicode_bidirectional_characters(doc) + return if !DANGEROUS_BIDI_REGEXP.match?(doc.content) + + doc.css("code,pre").each do |code_tag| + next if !DANGEROUS_BIDI_REGEXP.match?(code_tag.content) + + DANGEROUS_BIDI_CHARACTERS.each do |bidi| + next if !code_tag.content.include?(bidi) + + formatted = "<U+#{bidi.ord.to_s(16).upcase}>" + code_tag.inner_html = code_tag.inner_html.gsub( + bidi, + "#{formatted}" + ) + end + end + end + def self.add_rel_attributes_to_user_content(doc, add_nofollow) allowlist = [] diff --git a/spec/components/pretty_text_spec.rb b/spec/components/pretty_text_spec.rb index d4b7267291f..1007fb97b5f 100644 --- a/spec/components/pretty_text_spec.rb +++ b/spec/components/pretty_text_spec.rb @@ -552,6 +552,95 @@ describe PrettyText do Discourse.redis.flushdb end end + + it "strips out unicode bidirectional (bidi) override characters and replaces with a highlighted span" do + code = <<~MD + X + ```js + var isAdmin = false; + /*‮ begin admin only */⁦ if (isAdmin) ⁩ ⁦ { + console.log("You are an admin."); + /* end admins only ‮*/⁦ } + ``` + MD + cooked = PrettyText.cook(code) + hidden_bidi_title = I18n.t("post.hidden_bidi_character") + + html = <<~HTML +

X

+
var isAdmin = false;
+        /*<U+202E> begin admin only */<U+2066> if (isAdmin) <U+2069> <U+2066> {
+        console.log("You are an admin.");
+        /* end admins only <U+202E>*/<U+2066> }
+        
+ HTML + + expect(cooked).to eq(html.strip) + end + + it "fuzzes all possible dangerous unicode bidirectional (bidi) override characters, making sure they are replaced" do + bad_bidi = [ + "\u202A", + "\u202B", + "\u202C", + "\u202D", + "\u202E", + "\u2066", + "\u2067", + "\u2068", + "\u2069", + ] + bad_bidi.each do |bidi| + code = <<~MD + ``` + #{bidi} + ``` + MD + cooked = PrettyText.cook(code) + formatted_bidi = format("<U+%04X>", bidi.ord) + html = <<~HTML +
#{formatted_bidi}
+          
+ HTML + expect(cooked).to eq(html.strip) + end + end + + it "fuzzes all possible dangerous unicode bidirectional (bidi) override characters in solo code and pre nodes, making sure they are replaced" do + bad_bidi = [ + "\u202A", + "\u202B", + "\u202C", + "\u202D", + "\u202E", + "\u2066", + "\u2067", + "\u2068", + "\u2069", + ] + bad_bidi.each do |bidi| + code = <<~MD + #{bidi} + MD + cooked = PrettyText.cook(code) + formatted_bidi = format("<U+%04X>", bidi.ord) + html = <<~HTML +

#{formatted_bidi}

+ HTML + expect(cooked).to eq(html.strip) + end + bad_bidi.each do |bidi| + code = <<~MD +
#{bidi}
+ MD + cooked = PrettyText.cook(code) + formatted_bidi = format("<U+%04X>", bidi.ord) + html = <<~HTML +
#{formatted_bidi}
+ HTML + expect(cooked).to eq(html.strip) + end + end end describe "rel attributes" do