# frozen_string_literal: true # Post processing that we can do after a post has already been cooked. # For example, inserting the onebox content, or image sizes/thumbnails. require_dependency 'url_helper' require_dependency 'pretty_text' require_dependency 'quote_comparer' class CookedPostProcessor INLINE_ONEBOX_LOADING_CSS_CLASS = "inline-onebox-loading" INLINE_ONEBOX_CSS_CLASS = "inline-onebox" LIGHTBOX_WRAPPER_CSS_CLASS = "lightbox-wrapper" LOADING_SIZE = 10 LOADING_COLORS = 32 attr_reader :cooking_options, :doc def initialize(post, opts = {}) @dirty = false @opts = opts @post = post @previous_cooked = (@post.cooked || "").dup # NOTE: we re-cook the post here in order to prevent timing issues with edits # cf. https://meta.discourse.org/t/edit-of-rebaked-post-doesnt-show-in-html-only-in-raw/33815/6 @cooking_options = post.cooking_options || opts[:cooking_options] || {} @cooking_options[:topic_id] = post.topic_id @cooking_options = @cooking_options.symbolize_keys @doc = Nokogiri::HTML::fragment(post.cook(post.raw, @cooking_options)) @has_oneboxes = post.post_analyzer.found_oneboxes? @size_cache = {} @disable_loading_image = !!opts[:disable_loading_image] end def post_process(bypass_bump: false, new_post: false) DistributedMutex.synchronize("post_process_#{@post.id}", validity: 10.minutes) do DiscourseEvent.trigger(:before_post_process_cooked, @doc, @post) remove_full_quote_on_direct_reply if new_post post_process_oneboxes post_process_images post_process_quotes optimize_urls remove_user_ids update_post_image enforce_nofollow pull_hotlinked_images(bypass_bump) grant_badges @post.link_post_uploads(fragments: @doc) DiscourseEvent.trigger(:post_process_cooked, @doc, @post) nil end end def has_emoji? (@doc.css("img.emoji") - @doc.css(".quote img")).size > 0 end def grant_badges return unless Guardian.new.can_see?(@post) BadgeGranter.grant(Badge.find(Badge::FirstEmoji), @post.user, post_id: @post.id) if has_emoji? BadgeGranter.grant(Badge.find(Badge::FirstOnebox), @post.user, post_id: @post.id) if @has_oneboxes BadgeGranter.grant(Badge.find(Badge::FirstReplyByEmail), @post.user, post_id: @post.id) if @post.is_reply_by_email? end def post_process_quotes @doc.css("aside.quote").each do |q| post_number = q['data-post'] topic_id = q['data-topic'] if topic_id && post_number comparer = QuoteComparer.new( topic_id.to_i, post_number.to_i, q.css('blockquote').text ) if comparer.modified? q['class'] = ((q['class'] || '') + " quote-modified").strip end end end end def remove_full_quote_on_direct_reply return if !SiteSetting.remove_full_quote return if @post.post_number == 1 return if @doc.css("aside.quote").size != 1 previous = Post .where("post_number < ? AND topic_id = ? AND post_type = ? AND NOT hidden", @post.post_number, @post.topic_id, Post.types[:regular]) .order("post_number DESC") .limit(1) .pluck(:cooked) .first return if previous.blank? previous_text = Nokogiri::HTML::fragment(previous).text.strip quoted_text = @doc.css("aside.quote:first-child blockquote").first&.text&.strip || "" return if previous_text.gsub(/(\s){2,}/, '\1') != quoted_text.gsub(/(\s){2,}/, '\1') quote_regexp = /\A\s*\[quote.+?\[\/quote\]/im quoteless_raw = @post.raw.sub(quote_regexp, "").strip return if @post.raw.strip == quoteless_raw PostRevisor.new(@post).revise!( Discourse.system_user, { raw: quoteless_raw, edit_reason: I18n.t(:removed_direct_reply_full_quotes) }, skip_validations: true, bypass_bump: true ) end def add_image_placeholder!(img) src = img["src"].sub(/^https?:/i, "") if large_images.include?(src) return add_large_image_placeholder!(img) elsif broken_images.include?(src) return add_broken_image_placeholder!(img) end false end def add_large_image_placeholder!(img) url = img["src"] is_hyperlinked = is_a_hyperlink?(img) placeholder = create_node("div", "large-image-placeholder") img.add_next_sibling(placeholder) placeholder.add_child(img) a = create_link_node(nil, url, true) img.add_next_sibling(a) span = create_span_node("url", url) a.add_child(span) span.add_previous_sibling(create_icon_node("far-image")) span.add_next_sibling(create_span_node("help", I18n.t("upload.placeholders.too_large", max_size_kb: SiteSetting.max_image_size_kb))) # Only if the image is already linked if is_hyperlinked parent = placeholder.parent parent.add_next_sibling(placeholder) if parent.name == 'a' && parent["href"].present? if url == parent["href"] parent.remove else parent["class"] = "link" a.add_previous_sibling(parent) lspan = create_span_node("url", parent["href"]) parent.add_child(lspan) lspan.add_previous_sibling(create_icon_node("link")) end end end img.remove true end def add_broken_image_placeholder!(img) img.name = "span" img.set_attribute("class", "broken-image") img.set_attribute("title", I18n.t("post.image_placeholder.broken")) img << "" img.remove_attribute("src") img.remove_attribute("width") img.remove_attribute("height") true end def large_images @large_images ||= begin JSON.parse(@post.custom_fields[Post::LARGE_IMAGES].presence || "[]") rescue JSON::ParserError [] end end def broken_images @broken_images ||= begin JSON.parse(@post.custom_fields[Post::BROKEN_IMAGES].presence || "[]") rescue JSON::ParserError [] end end def downloaded_images @downloaded_images ||= @post.downloaded_images end def extract_images # all images with a src attribute @doc.css("img[src]") - # minus data images @doc.css("img[src^='data']") - # minus emojis @doc.css("img.emoji") - # minus oneboxed images oneboxed_images - # minus images inside quotes @doc.css(".quote img") end def extract_images_for_post # all images with a src attribute @doc.css("img[src]") - # minus emojis @doc.css("img.emoji") - # minus images inside quotes @doc.css(".quote img") end def oneboxed_images @doc.css(".onebox-body img, .onebox img, img.onebox") end def limit_size!(img) # retrieve the size from # 1) the width/height attributes # 2) the dimension from the preview (image_sizes) # 3) the dimension of the original image (HTTP request) w, h = get_size_from_attributes(img) || get_size_from_image_sizes(img["src"], @opts[:image_sizes]) || get_size(img["src"]) # limit the size of the thumbnail img["width"], img["height"] = ImageSizer.resize(w, h) end def get_size_from_attributes(img) w, h = img["width"].to_i, img["height"].to_i return [w, h] unless w <= 0 || h <= 0 # if only width or height are specified attempt to scale image if w > 0 || h > 0 w = w.to_f h = h.to_f return unless original_image_size = get_size(img["src"]) original_width, original_height = original_image_size.map(&:to_f) if w > 0 ratio = w / original_width [w.floor, (original_height * ratio).floor] else ratio = h / original_height [(original_width * ratio).floor, h.floor] end end end def get_size_from_image_sizes(src, image_sizes) return unless image_sizes.present? image_sizes.each do |image_size| url, size = image_size[0], image_size[1] if url && url.include?(src) && size && size["width"].to_i > 0 && size["height"].to_i > 0 return [size["width"], size["height"]] end end end def add_to_size_cache(url, w, h) @size_cache[url] = [w, h] end def get_size(url) return @size_cache[url] if @size_cache.has_key?(url) absolute_url = url absolute_url = Discourse.base_url_no_prefix + absolute_url if absolute_url =~ /^\/[^\/]/ return unless absolute_url # FastImage fails when there's no scheme absolute_url = SiteSetting.scheme + ":" + absolute_url if absolute_url.start_with?("//") return unless is_valid_image_url?(absolute_url) # we can *always* crawl our own images return unless SiteSetting.crawl_images? || Discourse.store.has_been_uploaded?(url) @size_cache[url] = FastImage.size(absolute_url) rescue Zlib::BufError, URI::Error, OpenSSL::SSL::SSLError # FastImage.size raises BufError for some gifs, leave it. end def is_valid_image_url?(url) uri = URI.parse(url) %w(http https).include? uri.scheme rescue URI::Error end def convert_to_link!(img) src = img["src"] return if src.blank? || is_a_hyperlink?(img) || is_svg?(img) width, height = img["width"].to_i, img["height"].to_i # TODO: store original dimentions in db original_width, original_height = (get_size(src) || [0, 0]).map(&:to_i) # can't reach the image... if original_width == 0 || original_height == 0 Rails.logger.info "Can't reach '#{src}' to get its dimension." return end return if original_width <= width && original_height <= height return if original_width <= SiteSetting.max_image_width && original_height <= SiteSetting.max_image_height crop = SiteSetting.min_ratio_to_crop > 0 crop &&= original_width.to_f / original_height.to_f < SiteSetting.min_ratio_to_crop if crop width, height = ImageSizer.crop(original_width, original_height) img["width"] = width img["height"] = height end if upload = Upload.get_from_url(src) upload.create_thumbnail!(width, height, crop: crop) each_responsive_ratio do |ratio| resized_w = (width * ratio).to_i resized_h = (height * ratio).to_i if upload.width && resized_w <= upload.width upload.create_thumbnail!(resized_w, resized_h, crop: crop) end end unless @disable_loading_image upload.create_thumbnail!(LOADING_SIZE, LOADING_SIZE, format: 'png', colors: LOADING_COLORS) end end add_lightbox!(img, original_width, original_height, upload, cropped: crop) end def loading_image(upload) upload.thumbnail(LOADING_SIZE, LOADING_SIZE) end def is_a_hyperlink?(img) parent = img.parent while parent return true if parent.name == "a" parent = parent.parent if parent.respond_to?(:parent) end false end def each_responsive_ratio SiteSetting .responsive_post_image_sizes .split('|') .map(&:to_f) .sort .each { |r| yield r if r > 1 } end def add_lightbox!(img, original_width, original_height, upload, cropped: false) # first, create a div to hold our lightbox lightbox = create_node("div", LIGHTBOX_WRAPPER_CSS_CLASS) img.add_next_sibling(lightbox) lightbox.add_child(img) # then, the link to our larger image a = create_link_node("lightbox", img["src"]) img.add_next_sibling(a) if upload a["data-download-href"] = Discourse.store.download_url(upload) end a.add_child(img) # replace the image by its thumbnail w, h = img["width"].to_i, img["height"].to_i if upload thumbnail = upload.thumbnail(w, h) if thumbnail && thumbnail.filesize.to_i < upload.filesize img["src"] = thumbnail.url srcset = +"" each_responsive_ratio do |ratio| resized_w = (w * ratio).to_i resized_h = (h * ratio).to_i if !cropped && upload.width && resized_w > upload.width cooked_url = UrlHelper.cook_url(upload.url) srcset << ", #{cooked_url} #{ratio.to_s.sub(/\.0$/, "")}x" elsif t = upload.thumbnail(resized_w, resized_h) cooked_url = UrlHelper.cook_url(t.url) srcset << ", #{cooked_url} #{ratio.to_s.sub(/\.0$/, "")}x" end img["srcset"] = "#{UrlHelper.cook_url(img["src"])}#{srcset}" if srcset.present? end else img["src"] = upload.url end if small_upload = loading_image(upload) img["data-small-upload"] = small_upload.url end end # then, some overlay informations meta = create_node("div", "meta") img.add_next_sibling(meta) filename = get_filename(upload, img["src"]) informations = +"#{original_width}×#{original_height}" informations << " #{upload.human_filesize}" if upload a["title"] = CGI.escapeHTML(img["title"] || filename) meta.add_child create_icon_node("far-image") meta.add_child create_span_node("filename", a["title"]) meta.add_child create_span_node("informations", informations) meta.add_child create_icon_node("discourse-expand") end def get_filename(upload, src) return File.basename(src) unless upload return upload.original_filename unless upload.original_filename =~ /^blob(\.png)?$/i return I18n.t("upload.pasted_image_filename") end def create_node(tag_name, klass) node = Nokogiri::XML::Node.new(tag_name, @doc) node["class"] = klass if klass.present? node end def create_span_node(klass, content = nil) span = create_node("span", klass) span.content = content if content span end def create_icon_node(klass) icon = create_node("svg", "fa d-icon d-icon-#{klass} svg-icon") icon.set_attribute("aria-hidden", "true") icon << "" end def create_link_node(klass, url, external = false) a = create_node("a", klass) a["href"] = url if external a["target"] = "_blank" a["rel"] = "nofollow noopener" end a end def update_post_image img = extract_images_for_post.first return if img.blank? if img["src"].present? @post.update_column(:image_url, img["src"][0...255]) # post @post.topic.update_column(:image_url, img["src"][0...255]) if @post.is_first_post? # topic end end def post_process_oneboxes limit = SiteSetting.max_oneboxes_per_post oneboxes = {} inlineOneboxes = {} Oneboxer.apply(@doc, extra_paths: [".#{INLINE_ONEBOX_LOADING_CSS_CLASS}"]) do |url, element| is_onebox = element["class"] == Oneboxer::ONEBOX_CSS_CLASS map = is_onebox ? oneboxes : inlineOneboxes skip_onebox = limit <= 0 && !map[url] if skip_onebox if is_onebox element.remove_class('onebox') else remove_inline_onebox_loading_class(element) end next end limit -= 1 map[url] = true if is_onebox @has_oneboxes = true Oneboxer.onebox(url, invalidate_oneboxes: !!@opts[:invalidate_oneboxes], user_id: @post&.user_id, category_id: @post&.topic&.category_id ) else process_inline_onebox(element) false end end oneboxed_images.each do |img| next if img["src"].blank? src = img["src"].sub(/^https?:/i, "") parent = img.parent img_classes = (img["class"] || "").split(" ") link_classes = ((parent&.name == "a" && parent["class"]) || "").split(" ") if img_classes.include?("onebox") || link_classes.include?("onebox") next if add_image_placeholder!(img) elsif large_images.include?(src) || broken_images.include?(src) img.remove next end upload_id = downloaded_images[src] upload = Upload.find_by_id(upload_id) if upload_id img["src"] = upload.url if upload.present? # make sure we grab dimensions for oneboxed images # and wrap in a div limit_size!(img) next if img["class"]&.include?('onebox-avatar') parent = parent&.parent if parent&.name == "a" parent_class = parent && parent["class"] width = img["width"].to_i height = img["height"].to_i if parent_class&.include?("onebox-body") && width > 0 && height > 0 # special instruction for width == height, assume we are dealing with an avatar if (img["width"].to_i == img["height"].to_i) found = false parent = img while parent = parent.parent if parent["class"] && parent["class"].include?("whitelistedgeneric") found = true break end end if found img["class"] = img["class"].to_s + " onebox-avatar" next end end if width < 64 && height < 64 img["class"] = img["class"].to_s + " onebox-full-image" else img.delete('width') img.delete('height') new_parent = img.add_next_sibling("
") new_parent.first.add_child(img) end elsif (parent_class&.include?("instagram-images") || parent_class&.include?("tweet-images") || parent_class&.include?("scale-images")) && width > 0 && height > 0 img.remove_attribute("width") img.remove_attribute("height") parent["class"] = "aspect-image-full-size" parent["style"] = "--aspect-ratio:#{width}/#{height};" end end if @cooking_options[:omit_nofollow] || !SiteSetting.add_rel_nofollow_to_user_content @doc.css(".onebox-body a, .onebox a").each { |a| a.remove_attribute("rel") } end end def optimize_urls %w{href data-download-href}.each do |selector| @doc.css("a[#{selector}]").each do |a| a[selector] = UrlHelper.cook_url(a[selector].to_s) end end %w{src data-small-upload}.each do |selector| @doc.css("img[#{selector}]").each do |img| img[selector] = UrlHelper.cook_url(img[selector].to_s) end end end def remove_user_ids @doc.css("a[href]").each do |a| uri = begin URI(a["href"]) rescue URI::Error next end next if uri.hostname != Discourse.current_hostname query = Rack::Utils.parse_nested_query(uri.query) next if !query.delete("u") uri.query = query.map { |k, v| "#{k}=#{v}" }.join("&").presence a["href"] = uri.to_s end end def enforce_nofollow if !@cooking_options[:omit_nofollow] && SiteSetting.add_rel_nofollow_to_user_content PrettyText.add_rel_nofollow_to_user_content(@doc) end end def pull_hotlinked_images(bypass_bump = false) # have we enough disk space? disable_if_low_on_disk_space # But still enqueue the job # don't download remote images for posts that are more than n days old return unless @post.created_at > (Date.today - SiteSetting.download_remote_images_max_days_old) # we only want to run the job whenever it's changed by a user return if @post.last_editor_id && @post.last_editor_id <= 0 # make sure no other job is scheduled Jobs.cancel_scheduled_job(:pull_hotlinked_images, post_id: @post.id) # schedule the job delay = SiteSetting.editing_grace_period + 1 Jobs.enqueue_in(delay.seconds.to_i, :pull_hotlinked_images, post_id: @post.id, bypass_bump: bypass_bump) end def disable_if_low_on_disk_space return false if !SiteSetting.download_remote_images_to_local return false if available_disk_space >= SiteSetting.download_remote_images_threshold return false if Discourse.store.external? SiteSetting.download_remote_images_to_local = false # log the site setting change reason = I18n.t("disable_remote_images_download_reason") staff_action_logger = StaffActionLogger.new(Discourse.system_user) staff_action_logger.log_site_setting_change("download_remote_images_to_local", true, false, details: reason) # also send a private message to the site contact user notify_about_low_disk_space true end def notify_about_low_disk_space SystemMessage.create_from_system_user(Discourse.site_contact_user, :download_remote_images_disabled) end def available_disk_space 100 - `df -P #{Rails.root}/public/uploads | tail -1 | tr -s ' ' | cut -d ' ' -f 5`.to_i end def dirty? @previous_cooked != html end def html @doc.try(:to_html) end private def post_process_images extract_images.each do |img| unless add_image_placeholder!(img) limit_size!(img) convert_to_link!(img) end end end def process_inline_onebox(element) inline_onebox = InlineOneboxer.lookup( element.attributes["href"].value, invalidate: !!@opts[:invalidate_oneboxes] ) if title = inline_onebox&.dig(:title) element.children = CGI.escapeHTML(title) element.add_class(INLINE_ONEBOX_CSS_CLASS) end remove_inline_onebox_loading_class(element) end def remove_inline_onebox_loading_class(element) element.remove_class(INLINE_ONEBOX_LOADING_CSS_CLASS) end def is_svg?(img) path = begin URI(img["src"]).path rescue URI::Error nil end File.extname(path) == '.svg' if path end end