diff --git a/config/locales/server.en.yml b/config/locales/server.en.yml index 3d7fbc45c24..4e7d572a746 100644 --- a/config/locales/server.en.yml +++ b/config/locales/server.en.yml @@ -986,6 +986,7 @@ en: post_onebox_maxlength: "Maximum length of a oneboxed Discourse post in characters." onebox_domains_blacklist: "A list of domains that will never be oneboxed." inline_onebox_domains_whitelist: "A list of domains that will be oneboxed in miniature form if linked without a title" + enable_inline_onebox_on_all_domains: "Ignore inline_onebox_domain_whitelist site setting and allow inline onebox on all domains." max_oneboxes_per_post: "Maximum number of oneboxes in a post." logo_url: "The logo image at the top left of your site, should be a wide rectangle shape. If left blank site title text will be shown." diff --git a/config/site_settings.yml b/config/site_settings.yml index b4c95b37e75..8fe5daf3fde 100644 --- a/config/site_settings.yml +++ b/config/site_settings.yml @@ -949,6 +949,8 @@ onebox: inline_onebox_domains_whitelist: default: '' type: list + enable_inline_onebox_on_all_domains: + default: false spam: add_rel_nofollow_to_user_content: true diff --git a/lib/inline_oneboxer.rb b/lib/inline_oneboxer.rb index f345bef4d38..4c71cba888d 100644 --- a/lib/inline_oneboxer.rb +++ b/lib/inline_oneboxer.rb @@ -36,13 +36,15 @@ class InlineOneboxer end end - if whitelist = SiteSetting.inline_onebox_domains_whitelist + always_allow = SiteSetting.enable_inline_onebox_on_all_domains + domains = SiteSetting.inline_onebox_domains_whitelist&.split('|') unless always_allow + + if always_allow || domains uri = URI(url) rescue nil - domains = whitelist.split('|') if uri.present? && uri.hostname.present? && - domains.include?(uri.hostname) && + (always_allow || domains.include?(uri.hostname)) && title = RetrieveTitle.crawl(url) return onebox_for(url, title, opts) end diff --git a/lib/retrieve_title.rb b/lib/retrieve_title.rb index 62d5498c157..e68809a09f0 100644 --- a/lib/retrieve_title.rb +++ b/lib/retrieve_title.rb @@ -13,11 +13,11 @@ module RetrieveTitle title = nil if doc = Nokogiri::HTML(html) - if node = doc.at('meta[property="og:title"]') + title = doc.at('title')&.inner_text + + if !title && node = doc.at('meta[property="og:title"]') title = node['content'] end - - title ||= doc.at('title')&.inner_text end if title.present? @@ -42,9 +42,6 @@ module RetrieveTitle # Fetch the beginning of a HTML document at a url def self.fetch_beginning(url) - # Never crawl in test mode - return if Rails.env.test? - fd = FinalDestination.new(url) uri = fd.resolve return "" unless uri diff --git a/spec/components/inline_oneboxer_spec.rb b/spec/components/inline_oneboxer_spec.rb index d58a0d3a7e3..491f322a0cd 100644 --- a/spec/components/inline_oneboxer_spec.rb +++ b/spec/components/inline_oneboxer_spec.rb @@ -68,6 +68,25 @@ describe InlineOneboxer do expect(onebox).to be_blank end + it "will crawl anything if allowed to" do + SiteSetting.enable_inline_onebox_on_all_domains = true + + # Final destination does a HEAD and a GET + stub_request(:head, "https://eviltrout.com/some-path").to_return(status: 200) + + stub_request(:get, "https://eviltrout.com/some-path"). + to_return(status: 200, body: "a blog", headers: {}) + + onebox = InlineOneboxer.lookup( + "https://eviltrout.com/some-path", + skip_cache: true + ) + + expect(onebox).to be_present + expect(onebox[:url]).to eq("https://eviltrout.com/some-path") + expect(onebox[:title]).to eq("a blog") + end + it "will lookup whitelisted domains" do SiteSetting.inline_onebox_domains_whitelist = "eviltrout.com" RetrieveTitle.stubs(:crawl).returns("Evil Trout's Blog") diff --git a/spec/components/retrieve_title_spec.rb b/spec/components/retrieve_title_spec.rb index 351a28acfab..a65be0b34d7 100644 --- a/spec/components/retrieve_title_spec.rb +++ b/spec/components/retrieve_title_spec.rb @@ -21,11 +21,22 @@ describe RetrieveTitle do expect(title).to eq("Another Title") end + it "will pick og:title if title is missing" do + title = RetrieveTitle.extract_title(<<~HTML + + + HTML + ) + + expect(title).to eq("Good Title") + end + it "will prefer the title from an opengraph tag" do title = RetrieveTitle.extract_title(<<~HTML - Bad Title - + Good Title + HTML )