discourse/lib/pretty_text.rb

Ignoring revisions in .git-blame-ignore-revs. Click here to bypass and see the normal blame view.

552 lines
16 KiB
Ruby
Raw Normal View History

# frozen_string_literal: true
require 'mini_racer'
2013-02-05 14:16:51 -05:00
require 'nokogiri'
require 'erb'
2013-02-05 14:16:51 -05:00
module PrettyText
@mutex = Mutex.new
@ctx_init = Mutex.new
2013-02-05 14:16:51 -05:00
def self.app_root
Rails.root
end
def self.find_file(root, filename)
return filename if File.file?("#{root}#{filename}")
es6_name = "#{filename}.js.es6"
return es6_name if File.file?("#{root}#{es6_name}")
2013-02-05 14:16:51 -05:00
js_name = "#{filename}.js"
return js_name if File.file?("#{root}#{js_name}")
erb_name = "#{filename}.js.es6.erb"
return erb_name if File.file?("#{root}#{erb_name}")
erb_name = "#{filename}.js.erb"
return erb_name if File.file?("#{root}#{erb_name}")
end
def self.apply_es6_file(ctx, root_path, part_name)
filename = find_file(root_path, part_name)
if filename
source = File.read("#{root_path}#{filename}")
source = ERB.new(source).result(binding) if filename =~ /\.erb$/
transpiler = DiscourseJsProcessor::Transpiler.new
transpiled = transpiler.perform(source, "#{Rails.root}/app/assets/javascripts/", part_name)
ctx.eval(transpiled)
else
# Look for vendored stuff
vendor_root = "#{Rails.root}/vendor/assets/javascripts/"
filename = find_file(vendor_root, part_name)
if filename
ctx.eval(File.read("#{vendor_root}#{filename}"))
end
end
2013-02-05 14:16:51 -05:00
end
def self.ctx_load_manifest(ctx, name)
manifest = File.read("#{Rails.root}/app/assets/javascripts/#{name}")
root_path = "#{Rails.root}/app/assets/javascripts/"
manifest.each_line do |l|
2016-08-25 07:45:29 -04:00
l = l.chomp
if l =~ /\/\/= require (\.\/)?(.*)$/
apply_es6_file(ctx, root_path, Regexp.last_match[2])
elsif l =~ /\/\/= require_tree (\.\/)?(.*)$/
path = Regexp.last_match[2]
2016-08-10 13:20:39 -04:00
Dir["#{root_path}/#{path}/**"].sort.each do |f|
apply_es6_file(ctx, root_path, f.sub(root_path, '')[1..-1].sub(/\.js(.es6)?$/, ''))
end
end
end
end
def self.create_es6_context
ctx = MiniRacer::Context.new(timeout: 25000, ensure_gc_after_idle: 2000)
ctx.eval("window = {}; window.devicePixelRatio = 2;") # hack to make code think stuff is retina
if Rails.env.development? || Rails.env.test?
ctx.attach("console.log", proc { |l| p l })
2017-06-23 15:24:11 -04:00
ctx.eval('window.console = console;')
end
ctx.eval("__PRETTY_TEXT = true")
PrettyText::Helpers.instance_methods.each do |method|
ctx.attach("__helpers.#{method}", PrettyText::Helpers.method(method))
end
ctx_load(ctx, "#{Rails.root}/app/assets/javascripts/discourse-loader.js")
ctx_load(ctx, "#{Rails.root}/app/assets/javascripts/handlebars-shim.js")
ctx_load(ctx, "vendor/assets/javascripts/lodash.js")
ctx_load_manifest(ctx, "pretty-text-bundle.js")
ctx_load_manifest(ctx, "markdown-it-bundle.js")
root_path = "#{Rails.root}/app/assets/javascripts/"
2013-02-05 14:16:51 -05:00
apply_es6_file(ctx, root_path, "discourse-common/addon/lib/get-url")
2020-09-02 11:52:54 -04:00
apply_es6_file(ctx, root_path, "discourse-common/addon/lib/object")
apply_es6_file(ctx, root_path, "discourse/app/lib/to-markdown")
apply_es6_file(ctx, root_path, "discourse/app/lib/utilities")
ctx.load("#{Rails.root}/lib/pretty_text/shims.js")
ctx.eval("__setUnicode(#{Emoji.unicode_replacements_json})")
2013-08-08 18:14:12 -04:00
to_load = []
DiscoursePluginRegistry.each_globbed_asset do |a|
to_load << a if File.file?(a) && a =~ /discourse-markdown/
end
to_load.uniq.each do |f|
if f =~ /^.+assets\/javascripts\//
root = Regexp.last_match[0]
apply_es6_file(ctx, root, f.sub(root, '').sub(/\.js(\.es6)?$/, ''))
2013-02-05 14:16:51 -05:00
end
end
DiscoursePluginRegistry.vendored_core_pretty_text.each do |vpt|
ctx.eval(File.read(vpt))
end
DiscoursePluginRegistry.vendored_pretty_text.each do |vpt|
ctx.eval(File.read(vpt))
end
ctx
end
def self.v8
return @ctx if @ctx
# ensure we only init one of these
@ctx_init.synchronize do
return @ctx if @ctx
@ctx = create_es6_context
end
2014-04-14 16:55:57 -04:00
2013-02-05 14:16:51 -05:00
@ctx
end
def self.reset_translations
v8.eval("__resetTranslationTree()")
end
def self.reset_context
@ctx_init.synchronize do
2017-07-20 00:17:45 -04:00
@ctx&.dispose
@ctx = nil
end
end
def self.markdown(text, opts = {})
2013-02-05 14:16:51 -05:00
# we use the exact same markdown converter as the client
2013-02-25 11:42:20 -05:00
# TODO: use the same extensions on both client and server (in particular the template for mentions)
2013-02-05 14:16:51 -05:00
baked = nil
text = text || ""
2013-02-05 14:16:51 -05:00
protect do
context = v8
custom_emoji = {}
Emoji.custom.map { |e| custom_emoji[e.name] = e.url }
buffer = +<<~JS
__optInput = {};
__optInput.siteSettings = #{SiteSetting.client_settings_json};
#{"__optInput.disableEmojis = true" if opts[:disable_emojis]}
__paths = #{paths_json};
__optInput.getURL = __getURL;
#{"__optInput.features = #{opts[:features].to_json};" if opts[:features]}
__optInput.getCurrentUser = __getCurrentUser;
__optInput.lookupAvatar = __lookupAvatar;
__optInput.lookupPrimaryUserGroup = __lookupPrimaryUserGroup;
__optInput.formatUsername = __formatUsername;
__optInput.getTopicInfo = __getTopicInfo;
__optInput.categoryHashtagLookup = __categoryLookup;
__optInput.customEmoji = #{custom_emoji.to_json};
__optInput.customEmojiTranslation = #{Plugin::CustomEmoji.translations.to_json};
2017-06-28 13:47:22 -04:00
__optInput.emojiUnicodeReplacer = __emojiUnicodeReplacer;
__optInput.lookupUploadUrls = __lookupUploadUrls;
__optInput.censoredRegexp = #{WordWatcher.word_matcher_regexp(:censor)&.source.to_json};
JS
if opts[:topicId]
buffer << "__optInput.topicId = #{opts[:topicId].to_i};\n"
end
if opts[:user_id]
buffer << "__optInput.userId = #{opts[:user_id].to_i};\n"
end
buffer << "__textOptions = __buildOptions(__optInput);\n"
buffer << ("__pt = new __PrettyText(__textOptions);")
# Be careful disabling sanitization. We allow for custom emails
if opts[:sanitize] == false
buffer << ('__pt.disableSanitizer();')
end
opts = context.eval(buffer)
DiscourseEvent.trigger(:markdown_context, context)
baked = context.eval("__pt.cook(#{text.inspect})")
2013-02-05 14:16:51 -05:00
end
baked
end
def self.paths_json
paths = {
baseUri: Discourse::base_uri,
CDN: Rails.configuration.action_controller.asset_host,
}
if SiteSetting.Upload.enable_s3_uploads
if SiteSetting.Upload.s3_cdn_url.present?
paths[:S3CDN] = SiteSetting.Upload.s3_cdn_url
end
paths[:S3BaseUrl] = Discourse.store.absolute_base_url
end
paths.to_json
end
2013-02-05 14:16:51 -05:00
# leaving this here, cause it invokes v8, don't want to implement twice
2013-08-13 16:08:29 -04:00
def self.avatar_img(avatar_template, size)
protect do
v8.eval(<<~JS)
__paths = #{paths_json};
__utils.avatarImg({size: #{size.inspect}, avatarTemplate: #{avatar_template.inspect}}, __getURL);
JS
2013-02-05 14:16:51 -05:00
end
end
2015-10-15 03:59:29 -04:00
def self.unescape_emoji(title)
return title unless SiteSetting.enable_emoji? && title
set = SiteSetting.emoji_set.inspect
custom = Emoji.custom.map { |e| [e.name, e.url] }.to_h.to_json
2015-10-15 03:59:29 -04:00
protect do
v8.eval(<<~JS)
__paths = #{paths_json};
__performEmojiUnescape(#{title.inspect}, {
getURL: __getURL,
emojiSet: #{set},
customEmoji: #{custom},
enableEmojiShortcuts: #{SiteSetting.enable_emoji_shortcuts},
inlineEmoji: #{SiteSetting.enable_inline_emoji_translation}
});
JS
2015-10-15 03:59:29 -04:00
end
end
def self.escape_emoji(title)
return unless title
replace_emoji_shortcuts = SiteSetting.enable_emoji && SiteSetting.enable_emoji_shortcuts
protect do
v8.eval(<<~JS)
__performEmojiEscape(#{title.inspect}, {
emojiShortcuts: #{replace_emoji_shortcuts},
inlineEmoji: #{SiteSetting.enable_inline_emoji_translation}
});
JS
end
end
2013-02-05 14:16:51 -05:00
def self.cook(text, opts = {})
options = opts.dup
2013-02-05 14:16:51 -05:00
# we have a minor inconsistency
options[:topicId] = opts[:topic_id]
working_text = text.dup
sanitized = markdown(working_text, options)
doc = Nokogiri::HTML5.fragment(sanitized)
if !options[:omit_nofollow] && SiteSetting.add_rel_nofollow_to_user_content
add_rel_nofollow_to_user_content(doc)
end
if SiteSetting.enable_mentions
add_mentions(doc, user_id: opts[:user_id])
end
scrubber = Loofah::Scrubber.new do |node|
node.remove if node.name == 'script'
end
loofah_fragment = Loofah.fragment(doc.to_html)
loofah_fragment.scrub!(scrubber).to_html
end
def self.add_rel_nofollow_to_user_content(doc)
allowlist = []
2013-02-11 03:01:33 -05:00
domains = SiteSetting.exclude_rel_nofollow_domains
allowlist = domains.split('|') if domains.present?
2013-02-11 03:01:33 -05:00
site_uri = nil
doc.css("a").each do |l|
href = l["href"].to_s
2013-02-25 11:42:20 -05:00
begin
uri = URI(UrlHelper.encode_component(href))
site_uri ||= URI(Discourse.base_url)
2013-02-25 11:42:20 -05:00
2013-11-05 13:04:47 -05:00
if !uri.host.present? ||
uri.host == site_uri.host ||
uri.host.ends_with?(".#{site_uri.host}") ||
allowlist.any? { |u| uri.host == u || uri.host.ends_with?(".#{u}") }
# we are good no need for nofollow
l.remove_attribute("rel")
else
2016-11-20 07:49:14 -05:00
l["rel"] = "nofollow noopener"
end
rescue URI::Error
2013-02-25 11:42:20 -05:00
# add a nofollow anyway
2016-11-20 07:49:14 -05:00
l["rel"] = "nofollow noopener"
end
end
2013-02-05 14:16:51 -05:00
end
class DetectedLink < Struct.new(:url, :is_quote); end
2014-07-11 00:17:01 -04:00
2013-02-05 14:16:51 -05:00
def self.extract_links(html)
links = []
doc = Nokogiri::HTML5.fragment(html)
# remove href inside quotes & elided part
doc.css("aside.quote a, .elided a").each { |a| a["href"] = "" }
2014-07-11 00:17:01 -04:00
# extract all links
doc.css("a").each do |a|
if a["href"].present? && a["href"][0] != "#"
links << DetectedLink.new(a["href"], false)
2014-07-11 00:17:01 -04:00
end
end
2013-02-25 11:42:20 -05:00
# extract quotes
doc.css("aside.quote[data-topic]").each do |aside|
if aside["data-topic"].present?
url = +"/t/topic/#{aside["data-topic"]}"
url << "/#{aside["data-post"]}" if aside["data-post"].present?
links << DetectedLink.new(url, true)
end
end
# extract Youtube links
doc.css("div[data-youtube-id]").each do |div|
if div["data-youtube-id"].present?
links << DetectedLink.new("https://www.youtube.com/watch?v=#{div['data-youtube-id']}", false)
end
end
2013-02-05 14:16:51 -05:00
links
end
2013-05-27 19:48:47 -04:00
def self.excerpt(html, max_length, options = {})
# TODO: properly fix this HACK in ExcerptParser without introducing XSS
doc = Nokogiri::HTML5.fragment(html)
DiscourseEvent.trigger(:reduce_excerpt, doc, options)
strip_image_wrapping(doc)
strip_oneboxed_media(doc)
html = doc.to_html
2013-05-27 19:48:47 -04:00
ExcerptParser.get_excerpt(html, max_length, options)
end
2013-02-05 14:16:51 -05:00
def self.strip_links(string)
return string if string.blank?
# If the user is not basic, strip links from their bio
fragment = Nokogiri::HTML5.fragment(string)
fragment.css('a').each { |a| a.replace(a.inner_html) }
fragment.to_html
end
def self.make_all_links_absolute(doc)
site_uri = nil
doc.css("a").each do |link|
href = link["href"].to_s
begin
uri = URI(href)
site_uri ||= URI(Discourse.base_url)
2018-06-08 13:11:52 -04:00
unless uri.host.present? || href.start_with?('mailto')
link["href"] = "#{site_uri}#{link['href']}"
2018-06-08 13:56:20 -04:00
end
rescue URI::Error
# leave it
end
end
end
def self.strip_image_wrapping(doc)
doc.css(".lightbox-wrapper .meta").remove
end
def self.strip_oneboxed_media(doc)
doc.css("audio").remove
doc.css(".video-onebox,video").remove
end
def self.convert_vimeo_iframes(doc)
doc.css("iframe[src*='player.vimeo.com']").each do |iframe|
if iframe["data-original-href"].present?
2019-05-09 11:37:55 -04:00
vimeo_url = UrlHelper.escape_uri(iframe["data-original-href"])
else
vimeo_id = iframe['src'].split('/').last
vimeo_url = "https://vimeo.com/#{vimeo_id}"
end
iframe.replace "<p><a href='#{vimeo_url}'>#{vimeo_url}</a></p>"
end
end
def self.strip_secure_media(doc)
doc.css("a[href]").each do |a|
if Upload.secure_media_url?(a["href"])
target = %w(video audio).include?(a&.parent&.name) ? a.parent : a
FEATURE: Allow email image embed with secure media (#10563) This PR introduces a few important changes to secure media redaction in emails. First of all, two new site settings have been introduced: * `secure_media_allow_embed_images_in_emails`: If enabled we will embed secure images in emails instead of redacting them. * `secure_media_max_email_embed_image_size_kb`: The cap to the size of the secure image we will embed, defaulting to 1mb, so the email does not become too big. Max is 10mb. Works in tandem with `email_total_attachment_size_limit_kb`. `Email::Sender` will now attach images to the email based on these settings. The sender will also call `inline_secure_images` in `Email::Styles` after secure media is redacted and attachments are added to replace redaction messages with attached images. I went with attachment and `cid` URLs because base64 image support is _still_ flaky in email clients. All redaction of secure media is now handled in `Email::Styles` and calls out to `PrettyText.strip_secure_media` to do the actual stripping and replacing with placeholders. `app/mailers/group_smtp_mailer.rb` and `app/mailers/user_notifications.rb` no longer do any stripping because they are earlier in the pipeline than `Email::Styles`. Finally the redaction notice has been restyled and includes a link to the media that the user can click, which will show it to them if they have the necessary permissions. ![image](https://user-images.githubusercontent.com/920448/92341012-b9a2c380-f0ff-11ea-860e-b376b4528357.png)
2020-09-09 19:50:16 -04:00
next if target.to_s.include?("stripped-secure-view-media")
target.add_next_sibling secure_media_placeholder(doc, a['href'])
target.remove
end
end
FEATURE: Allow email image embed with secure media (#10563) This PR introduces a few important changes to secure media redaction in emails. First of all, two new site settings have been introduced: * `secure_media_allow_embed_images_in_emails`: If enabled we will embed secure images in emails instead of redacting them. * `secure_media_max_email_embed_image_size_kb`: The cap to the size of the secure image we will embed, defaulting to 1mb, so the email does not become too big. Max is 10mb. Works in tandem with `email_total_attachment_size_limit_kb`. `Email::Sender` will now attach images to the email based on these settings. The sender will also call `inline_secure_images` in `Email::Styles` after secure media is redacted and attachments are added to replace redaction messages with attached images. I went with attachment and `cid` URLs because base64 image support is _still_ flaky in email clients. All redaction of secure media is now handled in `Email::Styles` and calls out to `PrettyText.strip_secure_media` to do the actual stripping and replacing with placeholders. `app/mailers/group_smtp_mailer.rb` and `app/mailers/user_notifications.rb` no longer do any stripping because they are earlier in the pipeline than `Email::Styles`. Finally the redaction notice has been restyled and includes a link to the media that the user can click, which will show it to them if they have the necessary permissions. ![image](https://user-images.githubusercontent.com/920448/92341012-b9a2c380-f0ff-11ea-860e-b376b4528357.png)
2020-09-09 19:50:16 -04:00
doc.css('img[src]').each do |img|
if Upload.secure_media_url?(img['src'])
img.add_next_sibling secure_media_placeholder(doc, img['src'])
img.remove
end
end
end
def self.secure_media_placeholder(doc, url)
<<~HTML
<div class="secure-media-notice" data-stripped-secure-media="#{url}">
#{I18n.t('emails.secure_media_placeholder')} <a class='stripped-secure-view-media' href="#{url}">#{I18n.t("emails.view_redacted_media")}</a>.
</div>
HTML
end
def self.format_for_email(html, post = nil)
doc = Nokogiri::HTML5.fragment(html)
DiscourseEvent.trigger(:reduce_cooked, doc, post)
strip_secure_media(doc) if post&.with_secure_media?
strip_image_wrapping(doc)
convert_vimeo_iframes(doc)
make_all_links_absolute(doc)
doc.to_html
end
2013-05-27 19:48:47 -04:00
protected
2013-02-05 14:16:51 -05:00
class JavaScriptError < StandardError
attr_accessor :message, :backtrace
def initialize(message, backtrace)
@message = message
@backtrace = backtrace
end
end
def self.protect
rval = nil
@mutex.synchronize do
rval = yield
end
rval
end
def self.ctx_load(ctx, *files)
2013-05-27 19:48:47 -04:00
files.each do |file|
ctx.load(app_root + file)
2013-02-05 14:16:51 -05:00
end
end
private
USER_TYPE ||= 'user'
GROUP_TYPE ||= 'group'
GROUP_MENTIONABLE_TYPE ||= 'group-mentionable'
def self.add_mentions(doc, user_id: nil)
elements = doc.css("span.mention")
2018-11-22 19:31:52 -05:00
names = elements.map { |element| element.text[1..-1] }
mentions = lookup_mentions(names, user_id: user_id)
elements.each do |element|
name = element.text[1..-1]
name.downcase!
if type = mentions[name]
element.name = 'a'
element.children = PrettyText::Helpers.format_username(
element.children.text
)
case type
when USER_TYPE
element['href'] = "#{Discourse::base_uri}/u/#{UrlHelper.encode_component(name)}"
when GROUP_MENTIONABLE_TYPE
element['class'] = 'mention-group notify'
element['href'] = "#{Discourse::base_uri}/groups/#{UrlHelper.encode_component(name)}"
when GROUP_TYPE
element['class'] = 'mention-group'
element['href'] = "#{Discourse::base_uri}/groups/#{UrlHelper.encode_component(name)}"
end
end
end
end
def self.lookup_mentions(names, user_id: nil)
return {} if names.blank?
sql = <<~SQL
(
SELECT
:user_type AS type,
username_lower AS name
FROM users
WHERE username_lower IN (:names) AND staged = false
)
UNION
(
SELECT
:group_type AS type,
lower(name) AS name
FROM groups
)
UNION
(
SELECT
:group_mentionable_type AS type,
lower(name) AS name
FROM groups
WHERE lower(name) IN (:names) AND (#{Group.mentionable_sql_clause(include_public: false)})
)
ORDER BY type
SQL
user = User.find_by(id: user_id)
names.each(&:downcase!)
results = DB.query(sql,
names: names,
user_type: USER_TYPE,
group_type: GROUP_TYPE,
group_mentionable_type: GROUP_MENTIONABLE_TYPE,
levels: Group.alias_levels(user),
user_id: user_id
)
mentions = {}
results.each { |result| mentions[result.name] = result.type }
mentions
end
2013-02-05 14:16:51 -05:00
end