Onebox improved error handling and support for Instagram Access Tokens (#11253)

* FEATURE: display error if Oneboxing fails due to HTTP error

- display warning if onebox URL is unresolvable
- display warning if attributes are missing

* FEATURE: Use new Instagram oEmbed endpoint if access token is configured

Instagram requires an Access Token to access their oEmbed endpoint. The requirements (from https://developers.facebook.com/docs/instagram/oembed/) are as follows:

- a Facebook Developer account, which you can create at developers.facebook.com
- a registered Facebook app
- the oEmbed Product added to the app
- an Access Token
- The Facebook app must be in Live Mode

The generated Access Token, once added to SiteSetting.facebook_app_access_token, will be passed to onebox. Onebox can then use this token to access the oEmbed endpoint to generate a onebox for Instagram.

* DEV: update user agent string

* DEV: don’t do HEAD requests against news.yahoo.com

* DEV: Bump onebox version from 2.1.5 to 2.1.6

* DEV: Avoid re-reading templates

* DEV: Tweaks to onebox mustache templates

* DEV: simplified error message for missing onebox data

* Apply suggestions from code review
Co-authored-by: Gerhard Schlager <mail@gerhard-schlager.at>
This commit is contained in:
jbrw 2020-11-18 12:55:16 -05:00 committed by GitHub
parent 8e8dca9246
commit 331236d6d7
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
16 changed files with 351 additions and 30 deletions

View File

@ -235,7 +235,7 @@ GEM
omniauth-twitter (1.4.0)
omniauth-oauth (~> 1.1)
rack
onebox (2.1.5)
onebox (2.1.6)
addressable (~> 2.7.0)
htmlentities (~> 4.3)
multi_json (~> 1.11)

View File

@ -379,6 +379,16 @@ pre.onebox code {
white-space: normal;
}
.onebox-warning-message {
margin-top: 5px;
color: var(--primary-med-or-secondary-med);
img.emoji {
width: 20px;
height: 20px;
float: none;
}
}
// Onebox - Github - PR, Commit & Issue
.onebox.githubpullrequest,
.onebox.githubcommit,

View File

@ -206,6 +206,16 @@ en:
cannot_enable_s3_uploads_when_s3_enabled_globally: "You cannot enable S3 uploads because S3 uploads are already globally enabled, and enabling this site-level could cause critical issues with uploads"
cors_origins_should_not_have_trailing_slash: "You should not add the trailing slash (/) to CORS origins."
conflicting_google_user_id: 'The Google Account ID for this account has changed; staff intervention is required for security reasons. Please contact staff and point them to <br><a href="https://meta.discourse.org/t/76575">https://meta.discourse.org/t/76575</a>'
onebox:
invalid_address: "Sorry, we were unable to generate a preview for this web page, because the server '%{hostname}' could not be found. Instead of a preview, only a link will appear in your post. :cry:"
error_response: "Sorry, we were unable to generate a preview for this web page, because the web server returned an error code of %{status_code}. Instead of a preview, only a link will appear in your post. :cry:"
missing_data:
one: "Sorry, we were unable to generate a preview for this web page, because the following oEmbed / OpenGraph tag could not be found: %{missing_attributes}"
other: "Sorry, we were unable to generate a preview for this web page, because the following oEmbed / OpenGraph tags could not be found: %{missing_attributes}"
word_connector:
# Connects words with a comma. Example: "foo, bar"
comma: ", "
activemodel:
errors:
@ -1491,6 +1501,7 @@ en:
enable_inline_onebox_on_all_domains: "Ignore inline_onebox_domain_allowlist site setting and allow inline onebox on all domains."
force_custom_user_agent_hosts: "Hosts for which to use the custom onebox user agent on all requests. (Especially useful for hosts that limit access by user agent)."
max_oneboxes_per_post: "Maximum number of oneboxes in a post."
facebook_app_access_token: "A token generated from your Facebook app ID and secret. Used to generate Instagram oneboxes."
logo: "The logo image at the top left of your site. Use a wide rectangular image with a height of 120 and an aspect ratio greater than 3:1. If left blank, the site title text will be shown."
logo_small: "The small logo image at the top left of your site, seen when scrolling down. Use a square 120 × 120 image. If left blank, a home glyph will be shown."

View File

@ -1599,6 +1599,9 @@ onebox:
force_custom_user_agent_hosts:
default: "http://codepen.io"
type: list
facebook_app_access_token:
default: ""
secret: true
spam:
add_rel_nofollow_to_user_content: true
hide_post_sensitivity:

View File

@ -67,7 +67,7 @@ class FinalDestination
@timeout = @opts[:timeout] || nil
@preserve_fragment_url = @preserve_fragment_url_hosts.any? { |host| hostname_matches?(host) }
@validate_uri = @opts.fetch(:validate_uri) { true }
@user_agent = @force_custom_user_agent_hosts.any? { |host| hostname_matches?(host) } ? Onebox.options.user_agent : "Mozilla/5.0 (Windows NT 6.2; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.3029.110 Safari/537.36"
@user_agent = @force_custom_user_agent_hosts.any? { |host| hostname_matches?(host) } ? Onebox.options.user_agent : "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/14.0 Safari/605.1.15"
end
def self.connection_timeout
@ -170,6 +170,7 @@ class FinalDestination
end
unless validate_uri
@status = :invalid_address
log(:warn, "FinalDestination could not resolve URL (invalid URI): #{@uri}") if @verbose
return nil
end
@ -276,6 +277,10 @@ class FinalDestination
(IPAddr.new(@uri.hostname) rescue nil).nil?
end
def hostname
@uri.hostname
end
def hostname_matches?(url)
url = uri(url)
@uri && url.present? && @uri.hostname == url&.hostname

View File

@ -28,5 +28,5 @@
{{/bio}}
<span>{{joined}} {{created_at}}</span>
</article>
<div style="clear: both"></div>
<div class="clearfix"></div>
</aside>

View File

@ -0,0 +1,4 @@
<article class="onebox-warning-message">
{{{error_message}}}
</article>
<div class="clearfix"></div>

View File

@ -0,0 +1,10 @@
<aside class="onebox preview-error">
{{#favicon}}
<img src="{{favicon}}" class="site-icon"/>
{{/favicon}}
<a href="{{link}}" target='_blank' rel='noopener noreferrer'>{{title}}</a>
<article class="onebox-warning-message">
{{{error_message}}}
</article>
<div class="clearfix"></div>
</aside>

View File

@ -27,7 +27,7 @@ module Oneboxer
end
def self.force_get_hosts
@force_get_hosts ||= ['http://us.battle.net']
@force_get_hosts ||= ['http://us.battle.net', 'https://news.yahoo.com/']
end
def self.force_custom_user_agent_hosts
@ -261,7 +261,7 @@ module Oneboxer
quote: PrettyText.unescape_emoji(post.excerpt(SiteSetting.post_onebox_maxlength)),
}
template = File.read("#{Rails.root}/lib/onebox/templates/discourse_topic_onebox.mustache")
template = template("discourse_topic_onebox")
Mustache.render(template, args)
end
end
@ -287,8 +287,7 @@ module Oneboxer
original_url: url
}
template = File.read("#{Rails.root}/lib/onebox/templates/discourse_user_onebox.mustache")
Mustache.render(template, args)
Mustache.render(template("discourse_user_onebox"), args)
else
nil
end
@ -313,12 +312,26 @@ module Oneboxer
def self.external_onebox(url)
Discourse.cache.fetch(onebox_cache_key(url), expires_in: 1.day) do
fd = FinalDestination.new(url,
ignore_redirects: ignore_redirects,
ignore_hostnames: blocked_domains,
force_get_hosts: force_get_hosts,
force_custom_user_agent_hosts: force_custom_user_agent_hosts,
preserve_fragment_url_hosts: preserve_fragment_url_hosts)
ignore_redirects: ignore_redirects,
ignore_hostnames: blocked_domains,
force_get_hosts: force_get_hosts,
force_custom_user_agent_hosts: force_custom_user_agent_hosts,
preserve_fragment_url_hosts: preserve_fragment_url_hosts)
uri = fd.resolve
if fd.status != :resolved
args = { link: url }
if fd.status == :invalid_address
args[:error_message] = I18n.t("errors.onebox.invalid_address", hostname: fd.hostname)
elsif fd.status_code
args[:error_message] = I18n.t("errors.onebox.error_response", status_code: fd.status_code)
end
error_box = blank_onebox
error_box[:preview] = preview_error_onebox(args)
return error_box
end
return blank_onebox if uri.blank? || blocked_domains.map { |hostname| uri.hostname.match?(hostname) }.any?
options = {
@ -326,13 +339,56 @@ module Oneboxer
sanitize_config: Onebox::DiscourseOneboxSanitizeConfig::Config::DISCOURSE_ONEBOX,
allowed_iframe_origins: allowed_iframe_origins,
hostname: GlobalSetting.hostname,
facebook_app_access_token: SiteSetting.facebook_app_access_token,
}
options[:cookie] = fd.cookie if fd.cookie
r = Onebox.preview(uri.to_s, options)
result = { onebox: r.to_s, preview: r&.placeholder_html.to_s }
{ onebox: r.to_s, preview: r&.placeholder_html.to_s }
# NOTE: Call r.errors after calling placeholder_html
if r.errors.any?
missing_attributes = r.errors.keys.map(&:to_s).sort.join(I18n.t("word_connector.comma"))
error_message = I18n.t("errors.onebox.missing_data", missing_attributes: missing_attributes, count: r.errors.keys.size)
args = r.data.merge(error_message: error_message)
if result[:preview].blank?
result[:preview] = preview_error_onebox(args)
else
doc = Nokogiri::HTML5::fragment(result[:preview])
aside = doc.at('aside')
if aside
# Add an error message to the preview that was returned
error_fragment = preview_error_onebox_fragment(args)
aside.add_child(error_fragment)
result[:preview] = doc.to_html
end
end
end
result
end
end
def self.preview_error_onebox(args, is_fragment = false)
args[:title] ||= args[:link] if args[:link]
args[:error_message] = PrettyText.unescape_emoji(args[:error_message]) if args[:error_message]
template_name = is_fragment ? "preview_error_fragment_onebox" : "preview_error_onebox"
Mustache.render(template(template_name), args)
end
def self.preview_error_onebox_fragment(args)
preview_error_onebox(args, true)
end
def self.template(template_name)
@template_cache ||= {}
@template_cache[template_name] ||= begin
full_path = "#{Rails.root}/lib/onebox/templates/#{template_name}.mustache"
File.read(full_path)
end
end

View File

@ -3,11 +3,16 @@
require 'rails_helper'
describe Oneboxer do
def response(file)
file = File.join("spec", "fixtures", "onebox", "#{file}.response")
File.exists?(file) ? File.read(file) : ""
end
it "returns blank string for an invalid onebox" do
stub_request(:head, "http://boom.com")
stub_request(:get, "http://boom.com").to_return(body: "")
expect(Oneboxer.preview("http://boom.com")).to eq("")
expect(Oneboxer.preview("http://boom.com", invalidate_oneboxes: true)).to include("Sorry, we were unable to generate a preview for this web page")
expect(Oneboxer.onebox("http://boom.com")).to eq("")
end
@ -191,6 +196,7 @@ describe Oneboxer do
<head>
<meta property="og:title" content="Onebox1">
<meta property="og:description" content="this is bodycontent">
<meta property="og:image" content="https://i.ytimg.com/vi/dQw4w9WgXcQ/maxresdefault.jpg">
</head>
<body>
<p>body</p>
@ -210,6 +216,7 @@ describe Oneboxer do
# Disable all onebox iframes:
SiteSetting.allowed_onebox_iframes = ""
output = Oneboxer.onebox("https://www.youtube.com/watch?v=dQw4w9WgXcQ", invalidate_oneboxes: true)
expect(output).not_to include("<iframe") # Generic onebox
expect(output).to include("allowlistedgeneric")
@ -248,4 +255,49 @@ describe Oneboxer do
expect(Oneboxer.onebox("https://allowlist.ed/iframes", invalidate_oneboxes: true)).to match("iframe src")
end
context 'missing attributes' do
before do
stub_request(:head, url)
end
let(:url) { "https://example.com/fake-url/" }
it 'handles a missing description' do
stub_request(:get, url).to_return(body: response("missing_description"))
expect(Oneboxer.preview(url, invalidate_oneboxes: true)).to include("could not be found: description")
end
it 'handles a missing description and image' do
stub_request(:get, url).to_return(body: response("missing_description_and_image"))
expect(Oneboxer.preview(url, invalidate_oneboxes: true)).to include("could not be found: description, image")
end
it 'video with missing description returns a placeholder' do
stub_request(:get, url).to_return(body: response("video_missing_description"))
expect(Oneboxer.preview(url, invalidate_oneboxes: true)).to include("onebox-placeholder-container")
end
end
context 'facebook_app_access_token' do
it 'providing a token should attempt to use new endpoint' do
url = "https://www.instagram.com/p/CHLkBERAiLa"
access_token = 'abc123'
SiteSetting.facebook_app_access_token = access_token
stub_request(:head, url)
stub_request(:get, "https://graph.facebook.com/v9.0/instagram_oembed?url=#{url}&access_token=#{access_token}").to_return(body: response("instagram_new"))
expect(Oneboxer.preview(url, invalidate_oneboxes: true)).not_to include('instagram-description')
end
it 'unconfigured token should attempt to use old endpoint' do
url = "https://www.instagram.com/p/CHLkBERAiLa"
stub_request(:head, url)
stub_request(:get, "https://api.instagram.com/oembed/?url=#{url}").to_return(body: response("instagram_old"))
expect(Oneboxer.preview(url, invalidate_oneboxes: true)).to include('instagram-description')
end
end
end

File diff suppressed because one or more lines are too long

File diff suppressed because one or more lines are too long

View File

@ -0,0 +1,64 @@
<!doctype html>
<html lang="en">
<head prefix="og: http://ogp.me/ns#">
<meta charset="utf-8">
<meta http-equiv="X-UA-Compatible" content="IE=edge,chrome=1">
<title>Open Graph protocol examples</title>
<meta property="og:type" content="website">
<meta property="og:locale" content="en_US">
<meta property="og:title" content="Open Graph protocol examples">
<meta property="og:site_name" content="Open Graph protocol examples">
<meta property="og:determiner" content="the">
<meta property="og:url" content="http://examples.opengraphprotocol.us/">
<meta property="og:image" content="http://examples.opengraphprotocol.us/media/images/logo.png">
<meta property="og:image:secure_url" content="https://d72cgtgi6hvvl.cloudfront.net/media/images/logo.png">
<meta property="og:image:type" content="image/png">
<meta property="og:image:width" content="300">
<meta property="og:image:height" content="300">
<link rel="author" href="http://www.niallkennedy.com/">
<link rel="icon shortcut" type="image/vnd.microsoft.icon" href="/favicon.ico" sizes="16x16">
<link rel="icon" type="image/png" href="/media/images/icon.png" sizes="16x16">
</head>
<body itemscope itemtype="http://schema.org/WebPage">
<header><h1 itemprop="name">Open Graph protocol examples</h1></header>
<div role="main">
<p itemprop="description">Examples of Open Graph protocol markup.</p>
<section id="standard">
<header><h2>Open Graph namespace</h2></header>
<ul>
<li><a href="/min.html">Missing required properties</a></li>
<li><a href="/required.html">Required properties</a></li>
<li><a href="/nomedia.html">Most properties, no media</a></li>
<li><a href="/canadian.html">Canadian (en_CA)</a></li>
<li>Images<ul>
<li><a href="/image.html">Structured image</a></li>
<li><a href="/image-url.html">Structured image w/ image:url</a></li>
<li><a href="/image-array.html">Structured image array</a></li>
<li><a href="/image-toosmall.html">One-pixel structured image</a></li></ul></li>
<li>Audio<ul>
<li><a href="/audio.html">Structured audio</a></li>
<li><a href="/audio-url.html">Structured audio w/ audio:url</a></li>
<li><a href="/audio-array.html">Structured audio array</a></li></ul></li>
<li>Video<ul>
<li><a href="/video.html">Structured video</a></li>
<li><a href="/video-array.html">Structured video array</a></li></ul></li>
</ul>
</section>
<section id="objects">
<header><h2>Global object namespace</h2></header>
<ul>
<li><a href="/article.html">Article</a></li>
<li><a href="/article-utc.html">Article w/ UTC datetime</a></li>
<li><a href="/article-offset.html">Article w/ UTC offset</a></li>
<li><a href="/book.html">Book</a></li>
<li><a href="/book-isbn10.html">Book w/ ISBN-10</a></li>
<li><a href="/profile.html">Profile</a></li>
<li><a href="/video-movie.html">Video movie</a></li>
</ul>
</section>
</div>
<footer style="text-align:center">by <a rel="author" href="http://www.niallkennedy.com/">Niall Kennedy</a></footer>
</body>
</html>

View File

@ -0,0 +1,59 @@
<!doctype html>
<html lang="en">
<head prefix="og: http://ogp.me/ns#">
<meta charset="utf-8">
<meta http-equiv="X-UA-Compatible" content="IE=edge,chrome=1">
<title>Open Graph protocol examples</title>
<meta property="og:type" content="website">
<meta property="og:locale" content="en_US">
<meta property="og:title" content="Open Graph protocol examples">
<meta property="og:site_name" content="Open Graph protocol examples">
<meta property="og:determiner" content="the">
<meta property="og:url" content="http://examples.opengraphprotocol.us/">
<link rel="author" href="http://www.niallkennedy.com/">
<link rel="icon shortcut" type="image/vnd.microsoft.icon" href="/favicon.ico" sizes="16x16">
<link rel="icon" type="image/png" href="/media/images/icon.png" sizes="16x16">
</head>
<body itemscope itemtype="http://schema.org/WebPage">
<header><h1 itemprop="name">Open Graph protocol examples</h1></header>
<div role="main">
<p itemprop="description">Examples of Open Graph protocol markup.</p>
<section id="standard">
<header><h2>Open Graph namespace</h2></header>
<ul>
<li><a href="/min.html">Missing required properties</a></li>
<li><a href="/required.html">Required properties</a></li>
<li><a href="/nomedia.html">Most properties, no media</a></li>
<li><a href="/canadian.html">Canadian (en_CA)</a></li>
<li>Images<ul>
<li><a href="/image.html">Structured image</a></li>
<li><a href="/image-url.html">Structured image w/ image:url</a></li>
<li><a href="/image-array.html">Structured image array</a></li>
<li><a href="/image-toosmall.html">One-pixel structured image</a></li></ul></li>
<li>Audio<ul>
<li><a href="/audio.html">Structured audio</a></li>
<li><a href="/audio-url.html">Structured audio w/ audio:url</a></li>
<li><a href="/audio-array.html">Structured audio array</a></li></ul></li>
<li>Video<ul>
<li><a href="/video.html">Structured video</a></li>
<li><a href="/video-array.html">Structured video array</a></li></ul></li>
</ul>
</section>
<section id="objects">
<header><h2>Global object namespace</h2></header>
<ul>
<li><a href="/article.html">Article</a></li>
<li><a href="/article-utc.html">Article w/ UTC datetime</a></li>
<li><a href="/article-offset.html">Article w/ UTC offset</a></li>
<li><a href="/book.html">Book</a></li>
<li><a href="/book-isbn10.html">Book w/ ISBN-10</a></li>
<li><a href="/profile.html">Profile</a></li>
<li><a href="/video-movie.html">Video movie</a></li>
</ul>
</section>
</div>
<footer style="text-align:center">by <a rel="author" href="http://www.niallkennedy.com/">Niall Kennedy</a></footer>
</body>
</html>

View File

@ -0,0 +1,30 @@
<!doctype html>
<html lang="en">
<head prefix="og: http://ogp.me/ns# video: http://ogp.me/ns/video#">
<meta charset="utf-8">
<title>Arrival of a Train at La Ciotat</title>
<meta property="og:title" content="Arrival of a Train at La Ciotat">
<meta property="og:site_name" content="Open Graph protocol examples">
<meta property="og:type" content="video.movie">
<meta property="og:locale" content="en_US">
<meta property="og:url" content="http://examples.opengraphprotocol.us/video-movie.html">
<meta property="og:image" content="http://examples.opengraphprotocol.us/media/images/train.jpg">
<meta property="og:image:secure_url" content="https://d72cgtgi6hvvl.cloudfront.net/media/images/train.jpg">
<meta property="og:image:width" content="500">
<meta property="og:image:height" content="328">
<meta property="og:image:type" content="image/jpeg">
<meta property="og:video" content="http://examples.opengraphprotocol.us/media/video/train.mp4">
<meta property="og:video:secure_url" content="https://d72cgtgi6hvvl.cloudfront.net/media/video/train.mp4">
<meta property="og:video:type" content="video/mp4">
<meta property="og:video:width" content="472">
<meta property="og:video:height" content="296">
<meta property="video:release_date" content="1895-12-28">
<meta property="video:director" content="http://examples.opengraphprotocol.us/profile.html">
<meta property="video:duration" content="50">
<meta property="video:tag" content="La Ciotat">
<meta property="video:tag" content="train">
</head>
<body>
<p>A <a href="http://ogp.me/#type_video.movie">video.movie</a> object.</p>
</body>
</html>

View File

@ -121,28 +121,17 @@ describe OneboxController do
stub_request(:get, url).to_return(body: response_body).then.to_raise
end
it "returns 404 if the onebox is nil" do
it "returns preview-error if the onebox is nil" do
stub_request_to_onebox_url(nil)
get "/onebox.json", params: { url: url, refresh: "true" }
expect(response.response_code).to eq(404)
expect(response.body).to include("Sorry, we were unable to generate a preview for this web page")
end
it "returns 404 if the onebox is an empty string" do
it "returns preview-error if the onebox is an empty string" do
stub_request_to_onebox_url(" \t ")
get "/onebox.json", params: { url: url, refresh: "true" }
expect(response.response_code).to eq(404)
end
it "cases missing onebox URLs so we do not attempt to preview again" do
stub_request_to_onebox_url(nil)
get "/onebox.json", params: { url: url, refresh: "true" }
expect(response.response_code).to eq(404)
Oneboxer.expects(:preview_onebox!).never
get "/onebox.json", params: { url: url, refresh: "true" }
expect(response.response_code).to eq(404)
expect(
Discourse.cache.read(Oneboxer.onebox_failed_cache_key(url))
).not_to eq(nil)
expect(response.response_code).to eq(200)
expect(response.body).to include("Sorry, we were unable to generate a preview for this web page")
end
end