FEATURE: option to enable inline oneboxes for all domains

Also, change to prefer title over open graph which is often way too sparse
This commit is contained in:
Sam 2017-08-02 14:27:21 -04:00
parent 582ae9ab8d
commit f6bc572fb8
6 changed files with 43 additions and 11 deletions

View File

@ -986,6 +986,7 @@ en:
post_onebox_maxlength: "Maximum length of a oneboxed Discourse post in characters."
onebox_domains_blacklist: "A list of domains that will never be oneboxed."
inline_onebox_domains_whitelist: "A list of domains that will be oneboxed in miniature form if linked without a title"
enable_inline_onebox_on_all_domains: "Ignore inline_onebox_domain_whitelist site setting and allow inline onebox on all domains."
max_oneboxes_per_post: "Maximum number of oneboxes in a post."
logo_url: "The logo image at the top left of your site, should be a wide rectangle shape. If left blank site title text will be shown."

View File

@ -949,6 +949,8 @@ onebox:
inline_onebox_domains_whitelist:
default: ''
type: list
enable_inline_onebox_on_all_domains:
default: false
spam:
add_rel_nofollow_to_user_content: true

View File

@ -36,13 +36,15 @@ class InlineOneboxer
end
end
if whitelist = SiteSetting.inline_onebox_domains_whitelist
always_allow = SiteSetting.enable_inline_onebox_on_all_domains
domains = SiteSetting.inline_onebox_domains_whitelist&.split('|') unless always_allow
if always_allow || domains
uri = URI(url) rescue nil
domains = whitelist.split('|')
if uri.present? &&
uri.hostname.present? &&
domains.include?(uri.hostname) &&
(always_allow || domains.include?(uri.hostname)) &&
title = RetrieveTitle.crawl(url)
return onebox_for(url, title, opts)
end

View File

@ -13,11 +13,11 @@ module RetrieveTitle
title = nil
if doc = Nokogiri::HTML(html)
if node = doc.at('meta[property="og:title"]')
title = doc.at('title')&.inner_text
if !title && node = doc.at('meta[property="og:title"]')
title = node['content']
end
title ||= doc.at('title')&.inner_text
end
if title.present?
@ -42,9 +42,6 @@ module RetrieveTitle
# Fetch the beginning of a HTML document at a url
def self.fetch_beginning(url)
# Never crawl in test mode
return if Rails.env.test?
fd = FinalDestination.new(url)
uri = fd.resolve
return "" unless uri

View File

@ -68,6 +68,25 @@ describe InlineOneboxer do
expect(onebox).to be_blank
end
it "will crawl anything if allowed to" do
SiteSetting.enable_inline_onebox_on_all_domains = true
# Final destination does a HEAD and a GET
stub_request(:head, "https://eviltrout.com/some-path").to_return(status: 200)
stub_request(:get, "https://eviltrout.com/some-path").
to_return(status: 200, body: "<html><head><title>a blog</title></head></html>", headers: {})
onebox = InlineOneboxer.lookup(
"https://eviltrout.com/some-path",
skip_cache: true
)
expect(onebox).to be_present
expect(onebox[:url]).to eq("https://eviltrout.com/some-path")
expect(onebox[:title]).to eq("a blog")
end
it "will lookup whitelisted domains" do
SiteSetting.inline_onebox_domains_whitelist = "eviltrout.com"
RetrieveTitle.stubs(:crawl).returns("Evil Trout's Blog")

View File

@ -21,11 +21,22 @@ describe RetrieveTitle do
expect(title).to eq("Another Title")
end
it "will pick og:title if title is missing" do
title = RetrieveTitle.extract_title(<<~HTML
<html>
<meta property="og:title" content="Good Title"
</html>
HTML
)
expect(title).to eq("Good Title")
end
it "will prefer the title from an opengraph tag" do
title = RetrieveTitle.extract_title(<<~HTML
<html>
<title>Bad Title</title>
<meta property="og:title" content="Good Title" />
<title>Good Title</title>
<meta property="og:title" content="Bad Title"
</html>
HTML
)