# frozen_string_literal: true require "uri" Dir["#{Rails.root}/lib/onebox/engine/*_onebox.rb"].sort.each { |f| require f } module Oneboxer ONEBOX_CSS_CLASS = "onebox" AUDIO_REGEX = /\A\.(mp3|og[ga]|opus|wav|m4[abpr]|aac|flac)\z/i VIDEO_REGEX = /\A\.(mov|mp4|webm|m4v|3gp|ogv|avi|mpeg|ogv)\z/i # keep reloaders happy unless defined?(Oneboxer::Result) Result = Struct.new(:doc, :changed) do def to_html doc.to_html end def changed? changed end end end def self.ignore_redirects @ignore_redirects ||= [ "http://www.dropbox.com", "http://store.steampowered.com", "http://vimeo.com", "https://www.youtube.com", Discourse.base_url, ] end def self.amazon_domains amazon_suffixes = %w[ com com.br ca cn fr de in it co.jp com.mx nl pl sa sg es se com.tr ae co.uk ] amazon_suffixes.collect { |suffix| "https://www.amazon.#{suffix}" } end def self.force_get_hosts hosts = [] hosts += SiteSetting.force_get_hosts.split("|").collect { |domain| "https://#{domain}" } hosts += SiteSetting .cache_onebox_response_body_domains .split("|") .collect { |domain| "https://www.#{domain}" } hosts += amazon_domains hosts.uniq end def self.force_custom_user_agent_hosts SiteSetting.force_custom_user_agent_hosts.split("|") end def self.allowed_post_types @allowed_post_types ||= [Post.types[:regular], Post.types[:moderator_action]] end def self.local_handlers @local_handlers ||= {} end def self.register_local_handler(controller, &handler) local_handlers[controller] = handler end def self.preview(url, options = nil) options ||= {} invalidate(url) if options[:invalidate_oneboxes] onebox_raw(url, options)[:preview] end def self.onebox(url, options = nil) options ||= {} invalidate(url) if options[:invalidate_oneboxes] onebox_raw(url, options)[:onebox] end def self.cached_onebox(url) if c = Discourse.cache.read(onebox_cache_key(url)) c[:onebox] end rescue => e invalidate(url) Rails.logger.warn("invalid cached onebox for #{url} #{e}") "" end def self.cached_preview(url) if c = Discourse.cache.read(onebox_cache_key(url)) c[:preview] end rescue => e invalidate(url) Rails.logger.warn("invalid cached preview for #{url} #{e}") "" end def self.invalidate(url) Discourse.cache.delete(onebox_cache_key(url)) Discourse.cache.delete(onebox_failed_cache_key(url)) end def self.cache_response_body?(uri) uri = URI.parse(uri) if uri.is_a?(String) if SiteSetting.cache_onebox_response_body? SiteSetting .cache_onebox_response_body_domains .split("|") .any? { |domain| uri.hostname.ends_with?(domain) } end end def self.cache_response_body(uri, response) key = redis_cached_response_body_key(uri) Discourse.redis.without_namespace.setex(key, 1.minutes.to_i, response) end def self.cached_response_body_exists?(uri) key = redis_cached_response_body_key(uri) Discourse.redis.without_namespace.exists(key).to_i > 0 end def self.fetch_cached_response_body(uri) key = redis_cached_response_body_key(uri) Discourse.redis.without_namespace.get(key) end def self.redis_cached_response_body_key(uri) "CACHED_RESPONSE_#{uri}" end # Parse URLs out of HTML, returning the document when finished. def self.each_onebox_link(doc, extra_paths: []) onebox_links = doc.css("a.#{ONEBOX_CSS_CLASS}", *extra_paths) if onebox_links.present? onebox_links.each { |link| yield(link["href"], link) if link["href"].present? } end doc end HTML5_BLOCK_ELEMENTS ||= %w[ address article aside blockquote canvas center dd div dl dt fieldset figcaption figure footer form h1 h2 h3 h4 h5 h6 header hgroup hr li main nav noscript ol output p pre section table tfoot ul video ] def self.apply(string_or_doc, extra_paths: nil) doc = string_or_doc doc = Loofah.html5_fragment(doc) if doc.is_a?(String) changed = false each_onebox_link(doc, extra_paths: extra_paths) do |url, element| onebox, _ = yield(url, element) next if onebox.blank? parsed_onebox = Loofah.html5_fragment(onebox) next if parsed_onebox.children.blank? changed = true parent = element.parent if parent&.node_name&.downcase == "p" && parsed_onebox.children.any? { |child| HTML5_BLOCK_ELEMENTS.include?(child.node_name.downcase) } siblings = parent.children element_idx = siblings.find_index(element) before_idx = first_significant_element_index(siblings, element_idx - 1, -1) after_idx = first_significant_element_index(siblings, element_idx + 1, +1) if before_idx < 0 && after_idx >= siblings.size parent.replace parsed_onebox elsif before_idx < 0 parent.children = siblings[after_idx..siblings.size] parent.add_previous_sibling(parsed_onebox) elsif after_idx >= siblings.size parent.children = siblings[0..before_idx] parent.add_next_sibling(parsed_onebox) else parent_rest = parent.dup parent.children = siblings[0..before_idx] parent_rest.children = siblings[after_idx..siblings.size] parent.add_next_sibling(parent_rest) parent.add_next_sibling(parsed_onebox) end else element.replace parsed_onebox end end Result.new(doc, changed) end def self.first_significant_element_index(elements, index, step) while index >= 0 && index < elements.size && ( elements[index].node_name.downcase == "br" || ( elements[index].node_name.downcase == "text" && elements[index].to_html.strip.blank? ) ) index = index + step end index end def self.is_previewing?(user_id) Discourse.redis.get(preview_key(user_id)) == "1" end def self.preview_onebox!(user_id) Discourse.redis.setex(preview_key(user_id), 1.minute, "1") end def self.onebox_previewed!(user_id) Discourse.redis.del(preview_key(user_id)) end def self.engine(url) Onebox::Matcher.new( url, { allowed_iframe_regexes: Onebox::Engine.origins_to_regexes(allowed_iframe_origins) }, ).oneboxed end def self.recently_failed?(url) Discourse.cache.read(onebox_failed_cache_key(url)).present? end def self.cache_failed!(url) Discourse.cache.write(onebox_failed_cache_key(url), true, expires_in: 1.hour) end private def self.preview_key(user_id) "onebox:preview:#{user_id}" end def self.blank_onebox { preview: "", onebox: "" } end def self.onebox_cache_key(url) "onebox__#{url}" end def self.onebox_failed_cache_key(url) "onebox_failed__#{url}" end def self.onebox_raw(url, opts = {}) url = UrlHelper.normalized_encode(url).to_s local_onebox(url, opts) || external_onebox(url) rescue => e # no point warning here, just cause we have an issue oneboxing a url # we can later hunt for failed oneboxes by searching logs if needed Rails.logger.info("Failed to onebox #{url} #{e} #{e.backtrace}") # return a blank hash, so rest of the code works blank_onebox end def self.local_onebox(url, opts = {}) return unless route = Discourse.route_for(url) html = case route[:controller] when "uploads" local_upload_html(url) when "topics" local_topic_html(url, route, opts) when "users" local_user_html(url, route) when "list" local_category_html(url, route) else if handler = local_handlers[route[:controller]] handler.call(url, route) end end normalized_url = ::Onebox::Helpers.normalize_url_for_output(URI(url).to_s) html = html.presence || "#{normalized_url}" { onebox: html, preview: html } end def self.local_upload_html(url) additional_controls = if SiteSetting.disable_onebox_media_download_controls "controlslist='nodownload'" else "" end normalized_url = ::Onebox::Helpers.normalize_url_for_output(url) case File.extname(URI(url).path || "") when VIDEO_REGEX <<~HTML
HTML when AUDIO_REGEX <<~HTML HTML end end def self.local_topic(url, route, opts) if current_user = User.find_by(id: opts[:user_id]) if current_category = Category.find_by(id: opts[:category_id]) return unless Guardian.new(current_user).can_see_category?(current_category) end if current_topic = Topic.find_by(id: opts[:topic_id]) return unless Guardian.new(current_user).can_see_topic?(current_topic) end end return unless topic = Topic.find_by(id: route[:id] || route[:topic_id]) return if topic.private_message? if current_category.blank? || current_category.id != topic.category_id return unless Guardian.new.can_see_topic?(topic) end topic end def self.local_topic_html(url, route, opts) return unless topic = local_topic(url, route, opts) post_number = route[:post_number].to_i post = ( if post_number > 1 topic.posts.where(post_number: post_number).first else topic.ordered_posts.first end ) return if !post || post.hidden || !allowed_post_types.include?(post.post_type) if post_number > 1 && opts[:topic_id] == topic.id excerpt = post.excerpt(SiteSetting.post_onebox_maxlength, keep_svg: true) excerpt.gsub!(/[\r\n]+/, " ") excerpt.gsub!("[/quote]", "[quote]") # don't break my quote quote = "[quote=\"#{post.user.username}, topic:#{topic.id}, post:#{post.post_number}\"]\n#{excerpt}\n[/quote]" PrettyText.cook(quote) else args = { topic_id: topic.id, post_number: post.post_number, avatar: PrettyText.avatar_img(post.user.avatar_template_url, "tiny"), original_url: url, title: PrettyText.unescape_emoji(CGI.escapeHTML(topic.title)), category_html: CategoryBadge.html_for(topic.category), quote: PrettyText.unescape_emoji( post.excerpt(SiteSetting.post_onebox_maxlength, keep_svg: true), ), } template = template("discourse_topic_onebox") Mustache.render(template, args) end end def self.local_user_html(url, route) username = route[:username] || "" if user = User.find_by(username_lower: username.downcase) name = user.name if SiteSetting.enable_names args = { user_id: user.id, username: user.username, avatar: PrettyText.avatar_img(user.avatar_template, "huge"), name: name, bio: user.user_profile.bio_excerpt(230), location: Onebox::Helpers.sanitize(user.user_profile.location), joined: I18n.t( "onebox.discourse.user_joined_community", date: user.created_at.strftime(I18n.t("datetime_formats.formats.date_only")), ), website: user.user_profile.website, website_name: UserSerializer.new(user).website_name, original_url: url, } Mustache.render(template("discourse_user_onebox"), args) else nil end end def self.local_category_html(url, route) return unless route[:category_slug_path_with_id] category = Category.find_by_slug_path_with_id(route[:category_slug_path_with_id]) if Guardian.new.can_see_category?(category) args = { url: category.url, name: category.name, color: category.color, logo_url: category.uploaded_logo&.url, description: category.description, has_subcategories: category.subcategories.present?, subcategories: category.subcategories.collect { |sc| { name: sc.name, color: sc.color, url: sc.url } }, } Mustache.render(template("discourse_category_onebox"), args) end end def self.preserve_fragment_url_hosts @preserve_fragment_url_hosts ||= ["http://github.com"] end def self.allowed_iframe_origins allowed = SiteSetting.allowed_onebox_iframes.split("|") allowed = Onebox::Engine.all_iframe_origins if allowed.include?("*") allowed += SiteSetting.allowed_iframes.split("|") end def self.external_onebox(url, available_strategies = nil) Discourse .cache .fetch(onebox_cache_key(url), expires_in: 1.day) do uri = URI(url) available_strategies ||= Oneboxer.ordered_strategies(uri.hostname) strategy = available_strategies.shift max_redirects = 0 if SiteSetting.block_onebox_on_redirect fd = FinalDestination.new( url, get_final_destination_options(url, strategy).merge( stop_at_blocked_pages: true, max_redirects: max_redirects, initial_https_redirect_ignore_limit: SiteSetting.block_onebox_on_redirect, ), ) uri = fd.resolve return blank_onebox if fd.status == :blocked_page if fd.status != :resolved args = { link: url } if fd.status == :invalid_address args[:error_message] = I18n.t("errors.onebox.invalid_address", hostname: fd.hostname) elsif (fd.status_code || uri.nil?) && available_strategies.present? # Try a different oneboxing strategy, if we have any options left: return external_onebox(url, available_strategies) elsif fd.status_code args[:error_message] = I18n.t( "errors.onebox.error_response", status_code: fd.status_code, ) end error_box = blank_onebox error_box[:preview] = preview_error_onebox(args) return error_box end return blank_onebox if uri.blank? onebox_options = { max_width: 695, sanitize_config: Onebox::SanitizeConfig::DISCOURSE_ONEBOX, allowed_iframe_origins: allowed_iframe_origins, hostname: GlobalSetting.hostname, facebook_app_access_token: SiteSetting.facebook_app_access_token, disable_media_download_controls: SiteSetting.disable_onebox_media_download_controls, body_cacher: self, content_type: fd.content_type, } onebox_options[:cookie] = fd.cookie if fd.cookie user_agent_override = SiteSetting.cache_onebox_user_agent if Oneboxer.cache_response_body?( url, ) && SiteSetting.cache_onebox_user_agent.present? onebox_options[:user_agent] = user_agent_override if user_agent_override preview_result = Onebox.preview(uri.to_s, onebox_options) result = { onebox: WordWatcher.censor(preview_result.to_s), preview: WordWatcher.censor(preview_result.placeholder_html.to_s), } # NOTE: Call preview_result.errors after calling placeholder_html if preview_result.errors.any? error_keys = preview_result.errors.keys skip_if_only_error = [:image] unless error_keys.length == 1 && skip_if_only_error.include?(error_keys.first) missing_attributes = error_keys.map(&:to_s).sort.join(I18n.t("word_connector.comma")) error_message = I18n.t( "errors.onebox.missing_data", missing_attributes: missing_attributes, count: error_keys.size, ) args = preview_result.verified_data.merge(error_message: error_message) if result[:preview].blank? result[:preview] = preview_error_onebox(args) else doc = Nokogiri::HTML5.fragment(result[:preview]) aside = doc.at("aside") if aside # Add an error message to the preview that was returned error_fragment = preview_error_onebox_fragment(args) aside.add_child(error_fragment) result[:preview] = doc.to_html end end end end Oneboxer.cache_preferred_strategy(uri.hostname, strategy) result end end def self.preview_error_onebox(args, is_fragment = false) args[:title] ||= args[:link] if args[:link] args[:error_message] = PrettyText.unescape_emoji(args[:error_message]) if args[:error_message] template_name = is_fragment ? "preview_error_fragment_onebox" : "preview_error_onebox" Mustache.render(template(template_name), args) end def self.preview_error_onebox_fragment(args) preview_error_onebox(args, true) end def self.template(template_name) @template_cache ||= {} @template_cache[template_name] ||= begin full_path = "#{Rails.root}/lib/onebox/templates/#{template_name}.mustache" File.read(full_path) end end def self.ordered_strategies(hostname) all = strategies.keys preferred = Oneboxer.preferred_strategy(hostname) all.insert(0, all.delete(preferred)) if all.include?(preferred) all end def self.strategies { default: { }, # don't override anything by default force_get_and_ua: { force_get_host: true, force_custom_user_agent_host: true, }, } end def self.cache_preferred_strategy(hostname, strategy) return if strategy == :default key = redis_oneboxer_strategy_key(hostname) Discourse.redis.without_namespace.setex(key, 2.weeks.to_i, strategy.to_s) end def self.clear_preferred_strategy!(hostname) key = redis_oneboxer_strategy_key(hostname) Discourse.redis.without_namespace.del(key) end def self.preferred_strategy(hostname) key = redis_oneboxer_strategy_key(hostname) Discourse.redis.without_namespace.get(key)&.to_sym end def self.redis_oneboxer_strategy_key(hostname) "ONEBOXER_STRATEGY_#{hostname}" end def self.get_final_destination_options(url, strategy = nil) fd_options = { ignore_redirects: ignore_redirects, force_get_hosts: force_get_hosts, force_custom_user_agent_hosts: force_custom_user_agent_hosts, preserve_fragment_url_hosts: preserve_fragment_url_hosts, timeout: 5, } uri = URI(url) strategy = Oneboxer.ordered_strategies(uri.hostname).shift if strategy.blank? if strategy && Oneboxer.strategies[strategy][:force_get_host] fd_options[:force_get_hosts] = ["https://#{uri.hostname}"] end if strategy && Oneboxer.strategies[strategy][:force_custom_user_agent_host] fd_options[:force_custom_user_agent_hosts] = ["https://#{uri.hostname}"] end user_agent_override = SiteSetting.cache_onebox_user_agent if Oneboxer.cache_response_body?( url, ) && SiteSetting.cache_onebox_user_agent.present? fd_options[:default_user_agent] = user_agent_override if user_agent_override fd_options end end