# frozen_string_literal: true require "mini_racer" require "nokogiri" require "erb" module PrettyText DANGEROUS_BIDI_CHARACTERS = [ "\u202A", "\u202B", "\u202C", "\u202D", "\u202E", "\u2066", "\u2067", "\u2068", "\u2069", ].freeze DANGEROUS_BIDI_REGEXP = Regexp.new(DANGEROUS_BIDI_CHARACTERS.join("|")).freeze BLOCKED_HOTLINKED_SRC_ATTR = "data-blocked-hotlinked-src" BLOCKED_HOTLINKED_SRCSET_ATTR = "data-blocked-hotlinked-srcset" @mutex = Mutex.new @ctx_init = Mutex.new def self.app_root Rails.root end def self.find_file(root, filename) return filename if File.file?("#{root}#{filename}") es6_name = "#{filename}.js.es6" return es6_name if File.file?("#{root}#{es6_name}") js_name = "#{filename}.js" return js_name if File.file?("#{root}#{js_name}") erb_name = "#{filename}.js.es6.erb" return erb_name if File.file?("#{root}#{erb_name}") erb_name = "#{filename}.js.erb" return erb_name if File.file?("#{root}#{erb_name}") end def self.apply_es6_file(ctx, root_path, part_name) filename = find_file(root_path, part_name) if filename source = File.read("#{root_path}#{filename}") source = ERB.new(source).result(binding) if filename =~ /\.erb\z/ transpiler = DiscourseJsProcessor::Transpiler.new transpiled = transpiler.perform(source, "#{Rails.root}/app/assets/javascripts/", part_name) ctx.eval(transpiled) else # Look for vendored stuff vendor_root = "#{Rails.root}/vendor/assets/javascripts/" filename = find_file(vendor_root, part_name) ctx.eval(File.read("#{vendor_root}#{filename}")) if filename end end def self.ctx_load_directory(ctx, path) root_path = "#{Rails.root}/app/assets/javascripts/" Dir["#{root_path}#{path}/**/*"].sort.each do |f| apply_es6_file(ctx, root_path, f.sub(root_path, "").sub(/\.js(.es6)?\z/, "")) end end def self.create_es6_context ctx = MiniRacer::Context.new(timeout: 25_000, ensure_gc_after_idle: 2000) ctx.eval("window = {}; window.devicePixelRatio = 2;") # hack to make code think stuff is retina ctx.attach("rails.logger.info", proc { |err| Rails.logger.info(err.to_s) }) ctx.attach("rails.logger.warn", proc { |err| Rails.logger.warn(err.to_s) }) ctx.attach("rails.logger.error", proc { |err| Rails.logger.error(err.to_s) }) ctx.eval <<~JS console = { prefix: "[PrettyText] ", log: function(...args){ rails.logger.info(console.prefix + args.join(" ")); }, warn: function(...args){ rails.logger.warn(console.prefix + args.join(" ")); }, error: function(...args){ rails.logger.error(console.prefix + args.join(" ")); } } JS ctx.eval("__PRETTY_TEXT = true") PrettyText::Helpers.instance_methods.each do |method| ctx.attach("__helpers.#{method}", PrettyText::Helpers.method(method)) end root_path = "#{Rails.root}/app/assets/javascripts/" ctx_load(ctx, "#{root_path}/node_modules/loader.js/dist/loader/loader.js") ctx_load(ctx, "#{root_path}/handlebars-shim.js") ctx_load(ctx, "#{root_path}/node_modules/xss/dist/xss.js") ctx.load("#{Rails.root}/lib/pretty_text/vendor-shims.js") ctx_load_directory(ctx, "pretty-text/addon") ctx_load_directory(ctx, "pretty-text/engines/discourse-markdown") ctx_load(ctx, "#{root_path}/node_modules/markdown-it/dist/markdown-it.js") apply_es6_file(ctx, root_path, "discourse-common/addon/lib/get-url") apply_es6_file(ctx, root_path, "discourse-common/addon/lib/object") apply_es6_file(ctx, root_path, "discourse-common/addon/lib/deprecated") apply_es6_file(ctx, root_path, "discourse-common/addon/lib/escape") apply_es6_file(ctx, root_path, "discourse-common/addon/utils/watched-words") apply_es6_file(ctx, root_path, "discourse/app/lib/to-markdown") apply_es6_file(ctx, root_path, "discourse/app/lib/utilities") ctx.load("#{Rails.root}/lib/pretty_text/shims.js") ctx.eval("__setUnicode(#{Emoji.unicode_replacements_json})") to_load = [] DiscoursePluginRegistry.each_globbed_asset do |a| to_load << a if File.file?(a) && a =~ /discourse-markdown/ end to_load.uniq.each do |f| if f =~ %r{\A.+assets/javascripts/} root = Regexp.last_match[0] apply_es6_file(ctx, root, f.sub(root, "").sub(/\.js(\.es6)?\z/, "")) end end DiscoursePluginRegistry.vendored_core_pretty_text.each { |vpt| ctx.eval(File.read(vpt)) } DiscoursePluginRegistry.vendored_pretty_text.each { |vpt| ctx.eval(File.read(vpt)) } ctx end def self.v8 return @ctx if @ctx # ensure we only init one of these @ctx_init.synchronize do return @ctx if @ctx @ctx = create_es6_context end @ctx end def self.reset_translations v8.eval("__resetTranslationTree()") end def self.reset_context @ctx_init.synchronize do @ctx&.dispose @ctx = nil end end # Acceptable options: # # disable_emojis - Disables the emoji markdown engine. # features - A hash where the key is the markdown feature name and the value is a boolean to enable/disable the markdown feature. # The hash is merged into the default features set in pretty-text.js which can be used to add new features or disable existing features. # features_override - An array of markdown feature names to override the default markdown feature set. Currently used by plugins to customize what features should be enabled # when rendering markdown. # markdown_it_rules - An array of markdown rule names which will be applied to the markdown-it engine. Currently used by plugins to customize what markdown-it rules should be # enabled when rendering markdown. # topic_id - Topic id for the post being cooked. # user_id - User id for the post being cooked. # force_quote_link - Always create the link to the quoted topic for [quote] bbcode. Normally this only happens # if the topic_id provided is different from the [quote topic:X]. # hashtag_context - Defaults to "topic-composer" if not supplied. Controls the order of #hashtag lookup results # based on registered hashtag contexts from the `#register_hashtag_search_param` plugin API # method. def self.markdown(text, opts = {}) # we use the exact same markdown converter as the client # TODO: use the same extensions on both client and server (in particular the template for mentions) baked = nil text = text || "" protect do context = v8 custom_emoji = {} Emoji.custom.map { |e| custom_emoji[e.name] = e.url } # note, any additional options added to __optInput here must be # also be added to the buildOptions function in pretty-text.js, # otherwise they will be discarded buffer = +<<~JS __optInput = {}; __optInput.siteSettings = #{SiteSetting.client_settings_json}; #{"__optInput.disableEmojis = true" if opts[:disable_emojis]} __paths = #{paths_json}; __optInput.getURL = __getURL; #{"__optInput.features = #{opts[:features].to_json};" if opts[:features]} #{"__optInput.featuresOverride = #{opts[:features_override].to_json};" if opts[:features_override]} #{"__optInput.markdownItRules = #{opts[:markdown_it_rules].to_json};" if opts[:markdown_it_rules]} __optInput.getCurrentUser = __getCurrentUser; __optInput.lookupAvatar = __lookupAvatar; __optInput.lookupPrimaryUserGroup = __lookupPrimaryUserGroup; __optInput.formatUsername = __formatUsername; __optInput.getTopicInfo = __getTopicInfo; __optInput.categoryHashtagLookup = __categoryLookup; __optInput.hashtagLookup = __hashtagLookup; __optInput.customEmoji = #{custom_emoji.to_json}; __optInput.customEmojiTranslation = #{Plugin::CustomEmoji.translations.to_json}; __optInput.emojiUnicodeReplacer = __emojiUnicodeReplacer; __optInput.lookupUploadUrls = __lookupUploadUrls; __optInput.censoredRegexp = #{WordWatcher.serializable_word_matcher_regexp(:censor).to_json}; __optInput.watchedWordsReplace = #{WordWatcher.word_matcher_regexps(:replace).to_json}; __optInput.watchedWordsLink = #{WordWatcher.word_matcher_regexps(:link).to_json}; __optInput.additionalOptions = #{Site.markdown_additional_options.to_json}; JS buffer << "__optInput.topicId = #{opts[:topic_id].to_i};\n" if opts[:topic_id] if opts[:force_quote_link] buffer << "__optInput.forceQuoteLink = #{opts[:force_quote_link]};\n" end if opts[:user_id] buffer << "__optInput.userId = #{opts[:user_id].to_i};\n" # NOTE: If using this for server-side cooking you will end up # with a Hash once it is passed to a PrettyText::Helper. If # you use that hash to instanciate a User model, you will want to do # user.reload before accessing data on this parsed User, otherwise # AR relations will not be loaded. buffer << "__optInput.currentUser = #{User.find(opts[:user_id]).to_json}\n" end opts[:hashtag_context] = opts[:hashtag_context] || "topic-composer" hashtag_types_as_js = HashtagAutocompleteService .ordered_types_for_context(opts[:hashtag_context]) .map { |t| "'#{t}'" } .join(",") hashtag_icons_as_js = HashtagAutocompleteService.data_source_icons.map { |i| "'#{i}'" }.join(",") buffer << "__optInput.hashtagTypesInPriorityOrder = [#{hashtag_types_as_js}];\n" buffer << "__optInput.hashtagIcons = [#{hashtag_icons_as_js}];\n" buffer << "__textOptions = __buildOptions(__optInput);\n" buffer << ("__pt = new __PrettyText(__textOptions);") # Be careful disabling sanitization. We allow for custom emails buffer << ("__pt.disableSanitizer();") if opts[:sanitize] == false opts = context.eval(buffer) DiscourseEvent.trigger(:markdown_context, context) baked = context.eval("__pt.cook(#{text.inspect})") end baked end def self.paths_json paths = { baseUri: Discourse.base_path, CDN: Rails.configuration.action_controller.asset_host } if SiteSetting.Upload.enable_s3_uploads paths[:S3CDN] = SiteSetting.Upload.s3_cdn_url if SiteSetting.Upload.s3_cdn_url.present? paths[:S3BaseUrl] = Discourse.store.absolute_base_url end paths.to_json end # leaving this here, cause it invokes v8, don't want to implement twice def self.avatar_img(avatar_template, size) protect { v8.eval(<<~JS) } __paths = #{paths_json}; __utils.avatarImg({size: #{size.inspect}, avatarTemplate: #{avatar_template.inspect}}, __getURL); JS end def self.unescape_emoji(title) return title unless SiteSetting.enable_emoji? && title set = SiteSetting.emoji_set.inspect custom = Emoji.custom.map { |e| [e.name, e.url] }.to_h.to_json protect { v8.eval(<<~JS) } __paths = #{paths_json}; __performEmojiUnescape(#{title.inspect}, { getURL: __getURL, emojiSet: #{set}, emojiCDNUrl: "#{SiteSetting.external_emoji_url.blank? ? "" : SiteSetting.external_emoji_url}", customEmoji: #{custom}, enableEmojiShortcuts: #{SiteSetting.enable_emoji_shortcuts}, inlineEmoji: #{SiteSetting.enable_inline_emoji_translation} }); JS end def self.escape_emoji(title) return unless title replace_emoji_shortcuts = SiteSetting.enable_emoji && SiteSetting.enable_emoji_shortcuts protect { v8.eval(<<~JS) } __performEmojiEscape(#{title.inspect}, { emojiShortcuts: #{replace_emoji_shortcuts}, inlineEmoji: #{SiteSetting.enable_inline_emoji_translation} }); JS end def self.cook(text, opts = {}) options = opts.dup working_text = text.dup sanitized = markdown(working_text, options) doc = Nokogiri::HTML5.fragment(sanitized) add_nofollow = !options[:omit_nofollow] && SiteSetting.add_rel_nofollow_to_user_content add_rel_attributes_to_user_content(doc, add_nofollow) strip_hidden_unicode_bidirectional_characters(doc) sanitize_hotlinked_media(doc) add_mentions(doc, user_id: opts[:user_id]) if SiteSetting.enable_mentions scrubber = Loofah::Scrubber.new { |node| node.remove if node.name == "script" } loofah_fragment = Loofah.fragment(doc.to_html) loofah_fragment.scrub!(scrubber).to_html end def self.strip_hidden_unicode_bidirectional_characters(doc) return if !DANGEROUS_BIDI_REGEXP.match?(doc.content) doc .css("code,pre") .each do |code_tag| next if !DANGEROUS_BIDI_REGEXP.match?(code_tag.content) DANGEROUS_BIDI_CHARACTERS.each do |bidi| next if !code_tag.content.include?(bidi) formatted = "<U+#{bidi.ord.to_s(16).upcase}>" code_tag.inner_html = code_tag.inner_html.gsub( bidi, "#{formatted}", ) end end end def self.sanitize_hotlinked_media(doc) return if !SiteSetting.block_hotlinked_media allowed_pattern = allowed_src_pattern doc .css("img[src], source[src], source[srcset], track[src]") .each do |el| if el["src"] && !el["src"].match?(allowed_pattern) el[PrettyText::BLOCKED_HOTLINKED_SRC_ATTR] = el.delete("src") end if el["srcset"] srcs = el["srcset"].split(",").map { |e| e.split(" ", 2)[0].presence } if srcs.any? { |src| !src.match?(allowed_pattern) } el[PrettyText::BLOCKED_HOTLINKED_SRCSET_ATTR] = el.delete("srcset") end end end end def self.add_rel_attributes_to_user_content(doc, add_nofollow) allowlist = [] domains = SiteSetting.exclude_rel_nofollow_domains allowlist = domains.split("|") if domains.present? site_uri = nil doc .css("a") .each do |l| href = l["href"].to_s l["rel"] = "noopener" if l["target"] == "_blank" begin uri = URI(UrlHelper.encode_component(href)) site_uri ||= URI(Discourse.base_url) same_domain = !uri.host.present? || uri.host == site_uri.host || uri.host.ends_with?(".#{site_uri.host}") || allowlist.any? { |u| uri.host == u || uri.host.ends_with?(".#{u}") } l["rel"] = "noopener nofollow ugc" if add_nofollow && !same_domain rescue URI::Error # add a nofollow anyway l["rel"] = "noopener nofollow ugc" end end end class DetectedLink < Struct.new(:url, :is_quote) end def self.extract_links(html) links = [] doc = Nokogiri::HTML5.fragment(html) # extract onebox links doc .css("aside.onebox[data-onebox-src]") .each { |onebox| links << DetectedLink.new(onebox["data-onebox-src"], false) } # remove href inside quotes & oneboxes & elided part doc.css("aside.quote a, aside.onebox a, .elided a").remove # remove hotlinked images doc.css("a.onebox > img").each { |img| img.parent.remove } # extract all links doc .css("a") .each do |a| links << DetectedLink.new(a["href"], false) if a["href"].present? && a["href"][0] != "#" end # extract quotes doc .css("aside.quote[data-topic]") .each do |aside| if aside["data-topic"].present? url = +"/t/#{aside["data-topic"]}" url << "/#{aside["data-post"]}" if aside["data-post"].present? links << DetectedLink.new(url, true) end end # extract Youtube links doc .css("div[data-youtube-id]") .each do |div| if div["data-youtube-id"].present? links << DetectedLink.new( "https://www.youtube.com/watch?v=#{div["data-youtube-id"]}", false, ) end end links end def self.extract_mentions(cooked) mentions = cooked .css(".mention, .mention-group") .map do |e| if (name = e.inner_text) name = name[1..-1] name = User.normalize_username(name) name end end mentions.compact! mentions.uniq! mentions end def self.excerpt(html, max_length, options = {}) # TODO: properly fix this HACK in ExcerptParser without introducing XSS doc = Nokogiri::HTML5.fragment(html) DiscourseEvent.trigger(:reduce_excerpt, doc, options) strip_image_wrapping(doc) strip_oneboxed_media(doc) if SiteSetting.enable_experimental_hashtag_autocomplete && options[:plain_hashtags] convert_hashtag_links_to_plaintext(doc) end html = doc.to_html ExcerptParser.get_excerpt(html, max_length, options) end def self.convert_hashtag_links_to_plaintext(doc) doc .css("a.hashtag-cooked") .each { |hashtag| hashtag.replace("##{hashtag.attributes["data-slug"]}") } end def self.strip_links(string) return string if string.blank? # If the user is not basic, strip links from their bio fragment = Nokogiri::HTML5.fragment(string) fragment.css("a").each { |a| a.replace(a.inner_html) } fragment.to_html end def self.make_all_links_absolute(doc) site_uri = nil doc .css("a") .each do |link| href = link["href"].to_s begin uri = URI(href) site_uri ||= URI(Discourse.base_url) unless uri.host.present? || href.start_with?("mailto") link["href"] = "#{site_uri}#{link["href"]}" end rescue URI::Error # leave it end end end def self.strip_image_wrapping(doc) doc.css(".lightbox-wrapper .meta").remove end def self.strip_oneboxed_media(doc) doc.css("audio").remove doc.css(".video-onebox,video").remove end def self.convert_vimeo_iframes(doc) doc .css("iframe[src*='player.vimeo.com']") .each do |iframe| if iframe["data-original-href"].present? vimeo_url = UrlHelper.normalized_encode(iframe["data-original-href"]) else vimeo_id = iframe["src"].split("/").last.sub("?h=", "/") vimeo_url = "https://vimeo.com/#{vimeo_id}" end iframe.replace Nokogiri::HTML5.fragment("

#{vimeo_url}

") end end def self.strip_secure_uploads(doc) # images inside a lightbox or other link doc .css("a[href]") .each do |a| next if !Upload.secure_uploads_url?(a["href"]) non_image_media = %w[video audio].include?(a&.parent&.name) target = non_image_media ? a.parent : a if target.to_s.include?("stripped-secure-view-media") || target.to_s.include?("stripped-secure-view-upload") next end next if a.css("img[src]").empty? && !non_image_media if a.classes.include?("lightbox") img = a.css("img[src]").first srcset = img&.attributes["srcset"]&.value if srcset # if available, use the first image from the srcset here # so we get the optimized image instead of the possibly huge original url = srcset.split(",").first else url = img["src"] end a.add_next_sibling secure_uploads_placeholder( doc, url, width: img["width"], height: img["height"], ) a.remove else width = non_image_media ? nil : a.at_css("img").attr("width") height = non_image_media ? nil : a.at_css("img").attr("height") target.add_next_sibling secure_uploads_placeholder( doc, a["href"], width: width, height: height, ) target.remove end end # images by themselves or inside a onebox doc .css("img[src]") .each do |img| url = if img.parent.classes.include?("aspect-image") && img.attributes["srcset"].present? # we are using the first image from the srcset here so we get the # optimized image instead of the original, because an optimized # image may be used for the onebox thumbnail srcset = img.attributes["srcset"].value srcset.split(",").first else img["src"] end width = img["width"] height = img["height"] onebox_type = nil if img.ancestors.css(".onebox-body").any? if img.classes.include?("onebox-avatar-inline") onebox_type = "avatar-inline" else onebox_type = "thumbnail" end end # we always want this to be tiny and without any special styles if img.classes.include?("site-icon") onebox_type = nil width = 16 height = 16 end if Upload.secure_uploads_url?(url) img.add_next_sibling secure_uploads_placeholder( doc, url, onebox_type: onebox_type, width: width, height: height, ) img.remove end end end def self.secure_uploads_placeholder(doc, url, onebox_type: false, width: nil, height: nil) data_width = width ? "data-width=#{width}" : "" data_height = height ? "data-height=#{height}" : "" data_onebox_type = onebox_type ? "data-onebox-type='#{onebox_type}'" : "" <<~HTML
#{I18n.t("emails.secure_uploads_placeholder")} #{I18n.t("emails.view_redacted_media")}.
HTML end def self.format_for_email(html, post = nil) doc = Nokogiri::HTML5.fragment(html) DiscourseEvent.trigger(:reduce_cooked, doc, post) strip_secure_uploads(doc) if post&.with_secure_uploads? strip_image_wrapping(doc) convert_vimeo_iframes(doc) make_all_links_absolute(doc) doc.to_html end protected class JavaScriptError < StandardError attr_accessor :message, :backtrace def initialize(message, backtrace) @message = message @backtrace = backtrace end end def self.protect rval = nil @mutex.synchronize { rval = yield } rval end def self.ctx_load(ctx, *files) files.each { |file| ctx.load(app_root + file) } end private USER_TYPE ||= "user" GROUP_TYPE ||= "group" GROUP_MENTIONABLE_TYPE ||= "group-mentionable" def self.add_mentions(doc, user_id: nil) elements = doc.css("span.mention") names = elements.map { |element| element.text[1..-1] } mentions = lookup_mentions(names, user_id: user_id) elements.each do |element| name = element.text[1..-1] name.downcase! if type = mentions[name] element.name = "a" element.children = PrettyText::Helpers.format_username(element.children.text) case type when USER_TYPE element["href"] = "#{Discourse.base_path}/u/#{UrlHelper.encode_component(name)}" when GROUP_MENTIONABLE_TYPE element["class"] = "mention-group notify" element["href"] = "#{Discourse.base_path}/groups/#{UrlHelper.encode_component(name)}" when GROUP_TYPE element["class"] = "mention-group" element["href"] = "#{Discourse.base_path}/groups/#{UrlHelper.encode_component(name)}" end end end end def self.lookup_mentions(names, user_id: nil) return {} if names.blank? sql = <<~SQL ( SELECT :user_type AS type, username_lower AS name FROM users WHERE username_lower IN (:names) AND staged = false ) UNION ( SELECT :group_type AS type, lower(name) AS name FROM groups ) UNION ( SELECT :group_mentionable_type AS type, lower(name) AS name FROM groups WHERE lower(name) IN (:names) AND (#{Group.mentionable_sql_clause(include_public: false)}) ) ORDER BY type SQL user = User.find_by(id: user_id) names.each(&:downcase!) results = DB.query( sql, names: names, user_type: USER_TYPE, group_type: GROUP_TYPE, group_mentionable_type: GROUP_MENTIONABLE_TYPE, levels: Group.alias_levels(user), user_id: user_id, ) mentions = {} results.each { |result| mentions[result.name] = result.type } mentions end def self.allowed_src_pattern allowed_src_prefixes = [ Discourse.base_path, Discourse.base_url, GlobalSetting.s3_cdn_url, GlobalSetting.cdn_url, SiteSetting.external_emoji_url.presence, *SiteSetting.block_hotlinked_media_exceptions.split("|"), ] patterns = allowed_src_prefixes.compact.map do |url| pattern = Regexp.escape(url) # If 'https://example.com' is allowed, ensure 'https://example.com.blah.com' is not pattern += '(?:/|\z)' if !pattern.ends_with?("\/") pattern end /\A(data:|#{patterns.join("|")})/ end end