discourse/spec/components/oneboxer_spec.rb
jbrw 68d0916eb5
FEATURE: Oneboxer cache response body (#12562)
* FEATURE: Cache successful HTTP GET requests during Oneboxing

Some oneboxes may fail if when making excessive and/or odd requests against the target domains. This change provides a simple mechanism to cache the results of succesful GET requests as part of the oneboxing process, with the goal of reducing repeated requests and ultimately improving the rate of successful oneboxing.

To enable:

Set `SiteSetting.cache_onebox_response_body` to `true`

Add the domains you’re interesting in caching to `SiteSetting. cache_onebox_response_body_domains` e.g. `example.com|example.org|example.net`

Optionally set `SiteSetting.cache_onebox_user_agent` to a user agent string of your choice to use when making requests against domains in the above list.

* FIX: Swap order of duration and value in redis call

The correct order for `setex` arguments is `key`, `duration`, and `value`.

Duration and value had been flipped, however the code would not have thrown an error because we were caching the value of `1.day.to_i` for a period of 1 seconds… The intention appears to be to set a value of 1 (purely as a flag) for a period of 1 day.
2021-03-31 13:19:34 -04:00

381 lines
16 KiB
Ruby

# frozen_string_literal: true
require 'rails_helper'
describe Oneboxer do
def response(file)
file = File.join("spec", "fixtures", "onebox", "#{file}.response")
File.exists?(file) ? File.read(file) : ""
end
it "returns blank string for an invalid onebox" do
stub_request(:head, "http://boom.com")
stub_request(:get, "http://boom.com").to_return(body: "")
expect(Oneboxer.preview("http://boom.com", invalidate_oneboxes: true)).to include("Sorry, we were unable to generate a preview for this web page")
expect(Oneboxer.onebox("http://boom.com")).to eq("")
end
describe "#invalidate" do
let(:url) { "http://test.com" }
it "clears the cached preview for the onebox URL and the failed URL cache" do
Discourse.cache.write(Oneboxer.onebox_cache_key(url), "test")
Discourse.cache.write(Oneboxer.onebox_failed_cache_key(url), true)
Oneboxer.invalidate(url)
expect(Discourse.cache.read(Oneboxer.onebox_cache_key(url))).to eq(nil)
expect(Discourse.cache.read(Oneboxer.onebox_failed_cache_key(url))).to eq(nil)
end
end
context "local oneboxes" do
def link(url)
url = "#{Discourse.base_url}#{url}"
%{<a href="#{url}">#{url}</a>}
end
def preview(url, user = nil, category = nil, topic = nil)
Oneboxer.preview("#{Discourse.base_url}#{url}",
user_id: user&.id,
category_id: category&.id,
topic_id: topic&.id).to_s
end
it "links to a topic/post" do
staff = Fabricate(:user)
Group[:staff].add(staff)
secured_category = Fabricate(:category)
secured_category.permissions = { staff: :full }
secured_category.save!
replier = Fabricate(:user)
public_post = Fabricate(:post, raw: "This post has an emoji :+1:")
public_topic = public_post.topic
public_reply = Fabricate(:post, topic: public_topic, post_number: 2, user: replier)
public_hidden = Fabricate(:post, topic: public_topic, post_number: 3, hidden: true)
public_moderator_action = Fabricate(:post, topic: public_topic, post_number: 4, user: staff, post_type: Post.types[:moderator_action])
user = public_post.user
public_category = public_topic.category
secured_topic = Fabricate(:topic, user: staff, category: secured_category)
secured_post = Fabricate(:post, user: staff, topic: secured_topic)
secured_reply = Fabricate(:post, user: staff, topic: secured_topic, post_number: 2)
expect(preview(public_topic.relative_url, user, public_category)).to include(public_topic.title)
onebox = preview(public_post.url, user, public_category)
expect(onebox).to include(public_topic.title)
expect(onebox).to include("/images/emoji/")
onebox = preview(public_reply.url, user, public_category)
expect(onebox).to include(public_reply.excerpt)
expect(onebox).to include(%{data-post="2"})
expect(onebox).to include(PrettyText.avatar_img(replier.avatar_template, "tiny"))
short_url = "#{Discourse.base_path}/t/#{public_topic.id}"
expect(preview(short_url, user, public_category)).to include(public_topic.title)
onebox = preview(public_moderator_action.url, user, public_category)
expect(onebox).to include(public_moderator_action.excerpt)
expect(onebox).to include(%{data-post="4"})
expect(onebox).to include(PrettyText.avatar_img(staff.avatar_template, "tiny"))
onebox = preview(public_reply.url, user, public_category, public_topic)
expect(onebox).not_to include(public_topic.title)
expect(onebox).to include(replier.avatar_template.sub("{size}", "40"))
expect(preview(public_hidden.url, user, public_category)).to match_html(link(public_hidden.url))
expect(preview(secured_topic.relative_url, user, public_category)).to match_html(link(secured_topic.relative_url))
expect(preview(secured_post.url, user, public_category)).to match_html(link(secured_post.url))
expect(preview(secured_reply.url, user, public_category)).to match_html(link(secured_reply.url))
expect(preview(public_topic.relative_url, user, secured_category)).to match_html(link(public_topic.relative_url))
expect(preview(public_reply.url, user, secured_category)).to match_html(link(public_reply.url))
expect(preview(secured_post.url, user, secured_category)).to match_html(link(secured_post.url))
expect(preview(secured_reply.url, user, secured_category)).to match_html(link(secured_reply.url))
expect(preview(public_topic.relative_url, staff, secured_category)).to include(public_topic.title)
expect(preview(public_post.url, staff, secured_category)).to include(public_topic.title)
expect(preview(public_reply.url, staff, secured_category)).to include(public_reply.excerpt)
expect(preview(public_hidden.url, staff, secured_category)).to match_html(link(public_hidden.url))
expect(preview(secured_topic.relative_url, staff, secured_category)).to include(secured_topic.title)
expect(preview(secured_post.url, staff, secured_category)).to include(secured_topic.title)
expect(preview(secured_reply.url, staff, secured_category)).to include(secured_reply.excerpt)
expect(preview(secured_reply.url, staff, secured_category, secured_topic)).not_to include(secured_topic.title)
end
it "links to an user profile" do
user = Fabricate(:user)
expect(preview("/u/does-not-exist")).to match_html(link("/u/does-not-exist"))
expect(preview("/u/#{user.username}")).to include(user.name)
end
it "should respect enable_names site setting" do
user = Fabricate(:user)
SiteSetting.enable_names = true
expect(preview("/u/#{user.username}")).to include(user.name)
SiteSetting.enable_names = false
expect(preview("/u/#{user.username}")).not_to include(user.name)
end
it "links to an upload" do
path = "/uploads/default/original/3X/e/8/e8fcfa624e4fb6623eea57f54941a58ba797f14d"
expect(preview("#{path}.pdf")).to match_html(link("#{path}.pdf"))
expect(preview("#{path}.MP3")).to include("<audio ")
expect(preview("#{path}.mov")).to include("<video ")
end
it "strips HTML from user profile location" do
user = Fabricate(:user)
profile = user.reload.user_profile
expect(preview("/u/#{user.username}")).not_to include("<span class=\"location\">")
profile.update!(
location: "<img src=x onerror=alert(document.domain)>",
)
expect(preview("/u/#{user.username}")).to include("<span class=\"location\">")
expect(preview("/u/#{user.username}")).not_to include("<img src=x")
profile.update!(
location: "Thunderland",
)
expect(preview("/u/#{user.username}")).to include("Thunderland")
end
end
context ".onebox_raw" do
it "should escape the onebox URL before processing" do
post = Fabricate(:post, raw: Discourse.base_url + "/new?'class=black")
cpp = CookedPostProcessor.new(post, invalidate_oneboxes: true)
cpp.post_process_oneboxes
expect(cpp.html).to eq("<p><a href=\"#{Discourse.base_url}/new?%27class=black\">http://test.localhost/new?%27class=black</a></p>")
end
end
it "does not crawl blocklisted URLs" do
SiteSetting.blocked_onebox_domains = "git.*.com|bitbucket.com"
url = 'https://github.com/discourse/discourse/commit/21b562852885f883be43032e03c709241e8e6d4f'
stub_request(:head, 'https://discourse.org/').to_return(status: 302, body: "", headers: { location: url })
expect(Oneboxer.external_onebox(url)[:onebox]).to be_empty
expect(Oneboxer.external_onebox('https://discourse.org/')[:onebox]).to be_empty
end
it "does not consider ignore_redirects domains as blocklisted" do
url = 'https://store.steampowered.com/app/271590/Grand_Theft_Auto_V/'
stub_request(:head, url).to_return(status: 200, body: "", headers: {})
stub_request(:get, url).to_return(status: 200, body: "", headers: {})
expect(Oneboxer.external_onebox(url)[:onebox]).to be_present
end
it "uses the Onebox custom user agent on specified hosts" do
SiteSetting.force_custom_user_agent_hosts = "http://codepen.io|https://video.discourse.org/"
url = 'https://video.discourse.org/presentation.mp4'
stub_request(:head, url).to_return(status: 403, body: "", headers: {})
stub_request(:get, url).to_return(status: 403, body: "", headers: {})
stub_request(:head, url).with(headers: { "User-Agent" => Onebox.options.user_agent }).to_return(status: 200, body: "", headers: {})
stub_request(:get, url).with(headers: { "User-Agent" => Onebox.options.user_agent }).to_return(status: 200, body: "", headers: {})
expect(Oneboxer.preview(url, invalidate_oneboxes: true)).to be_present
end
context "with youtube stub" do
let(:html) do
<<~HTML
<html>
<head>
<meta property="og:title" content="Onebox1">
<meta property="og:description" content="this is bodycontent">
<meta property="og:image" content="https://i.ytimg.com/vi/dQw4w9WgXcQ/maxresdefault.jpg">
</head>
<body>
<p>body</p>
</body>
<html>
HTML
end
before do
stub_request(:any, "https://www.youtube.com/watch?v=dQw4w9WgXcQ").to_return(status: 200, body: html)
end
it "allows restricting engines based on the allowed_onebox_iframes setting" do
output = Oneboxer.onebox("https://www.youtube.com/watch?v=dQw4w9WgXcQ", invalidate_oneboxes: true)
expect(output).to include("<iframe") # Regular youtube onebox
# Disable all onebox iframes:
SiteSetting.allowed_onebox_iframes = ""
output = Oneboxer.onebox("https://www.youtube.com/watch?v=dQw4w9WgXcQ", invalidate_oneboxes: true)
expect(output).not_to include("<iframe") # Generic onebox
expect(output).to include("allowlistedgeneric")
# Just enable youtube:
SiteSetting.allowed_onebox_iframes = "https://www.youtube.com"
output = Oneboxer.onebox("https://www.youtube.com/watch?v=dQw4w9WgXcQ", invalidate_oneboxes: true)
expect(output).to include("<iframe") # Regular youtube onebox
end
end
it "allows iframes from generic sites via the allowed_iframes setting" do
allowlisted_body = '<html><head><link rel="alternate" type="application/json+oembed" href="https://allowlist.ed/iframes.json" />'
blocklisted_body = '<html><head><link rel="alternate" type="application/json+oembed" href="https://blocklist.ed/iframes.json" />'
allowlisted_oembed = {
type: "rich",
height: "100",
html: "<iframe src='https://ifram.es/foo/bar'></iframe>"
}
blocklisted_oembed = {
type: "rich",
height: "100",
html: "<iframe src='https://malicious/discourse.org/'></iframe>"
}
stub_request(:any, "https://blocklist.ed/iframes").to_return(status: 200, body: blocklisted_body)
stub_request(:any, "https://blocklist.ed/iframes.json").to_return(status: 200, body: blocklisted_oembed.to_json)
stub_request(:any, "https://allowlist.ed/iframes").to_return(status: 200, body: allowlisted_body)
stub_request(:any, "https://allowlist.ed/iframes.json").to_return(status: 200, body: allowlisted_oembed.to_json)
SiteSetting.allowed_iframes = "discourse.org|https://ifram.es"
expect(Oneboxer.onebox("https://blocklist.ed/iframes", invalidate_oneboxes: true)).to be_empty
expect(Oneboxer.onebox("https://allowlist.ed/iframes", invalidate_oneboxes: true)).to match("iframe src")
end
context 'missing attributes' do
before do
stub_request(:head, url)
end
let(:url) { "https://example.com/fake-url/" }
it 'handles a missing description' do
stub_request(:get, url).to_return(body: response("missing_description"))
expect(Oneboxer.preview(url, invalidate_oneboxes: true)).to include("could not be found: description")
end
it 'handles a missing description and image' do
stub_request(:get, url).to_return(body: response("missing_description_and_image"))
expect(Oneboxer.preview(url, invalidate_oneboxes: true)).to include("could not be found: description, image")
end
it 'handles a missing image' do
# Note: If the only error is a missing image, we shouldn't return an error
stub_request(:get, url).to_return(body: response("missing_image"))
expect(Oneboxer.preview(url, invalidate_oneboxes: true)).not_to include("could not be found")
end
it 'video with missing description returns a placeholder' do
stub_request(:get, url).to_return(body: response("video_missing_description"))
expect(Oneboxer.preview(url, invalidate_oneboxes: true)).to include("onebox-placeholder-container")
end
end
context 'facebook_app_access_token' do
it 'providing a token should attempt to use new endpoint' do
url = "https://www.instagram.com/p/CHLkBERAiLa"
access_token = 'abc123'
SiteSetting.facebook_app_access_token = access_token
stub_request(:head, url)
stub_request(:get, "https://graph.facebook.com/v9.0/instagram_oembed?url=#{url}&access_token=#{access_token}").to_return(body: response("instagram_new"))
expect(Oneboxer.preview(url, invalidate_oneboxes: true)).not_to include('instagram-description')
end
it 'unconfigured token should attempt to use old endpoint' do
url = "https://www.instagram.com/p/CHLkBERAiLa"
stub_request(:head, url)
stub_request(:get, "https://api.instagram.com/oembed/?url=#{url}").to_return(body: response("instagram_old"))
expect(Oneboxer.preview(url, invalidate_oneboxes: true)).to include('instagram-description')
end
end
describe '#apply' do
it 'generates valid HTML' do
raw = "Before Onebox\nhttps://example.com\nAfter Onebox"
cooked = Oneboxer.apply(PrettyText.cook(raw)) { '<div>onebox</div>' }
doc = Nokogiri::HTML5::fragment(cooked.to_html)
expect(doc.to_html).to match_html <<~HTML
<p>Before Onebox</p>
<div>onebox</div>
<p>After Onebox</p>
HTML
raw = "Before Onebox\nhttps://example.com\nhttps://example.com\nAfter Onebox"
cooked = Oneboxer.apply(PrettyText.cook(raw)) { '<div>onebox</div>' }
doc = Nokogiri::HTML5::fragment(cooked.to_html)
expect(doc.to_html).to match_html <<~HTML
<p>Before Onebox</p>
<div>onebox</div>
<div>onebox</div>
<p>After Onebox</p>
HTML
end
end
describe '#force_get_hosts' do
before do
SiteSetting.cache_onebox_response_body_domains = "example.net|example.com|example.org"
end
it "includes Amazon sites" do
expect(Oneboxer.force_get_hosts).to include('https://www.amazon.ca')
end
it "includes cache_onebox_response_body_domains" do
expect(Oneboxer.force_get_hosts).to include('https://www.example.com')
end
end
describe 'cache_onebox_response_body' do
let(:html) do
<<~HTML
<html>
<body>
<p>cache me if you can</p>
</body>
<html>
HTML
end
let(:url) { "https://www.example.com/my/great/content" }
let(:url2) { "https://www.example2.com/my/great/content" }
before do
stub_request(:any, url).to_return(status: 200, body: html)
stub_request(:any, url2).to_return(status: 200, body: html)
SiteSetting.cache_onebox_response_body = true
SiteSetting.cache_onebox_response_body_domains = "example.net|example.com|example.org"
end
it "caches when domain matches" do
preview = Oneboxer.preview(url, invalidate_oneboxes: true)
expect(Oneboxer.cached_response_body_exists?(url)).to eq(true)
expect(Oneboxer.fetch_cached_response_body(url)).to eq(html)
end
it "ignores cache when domain not present" do
preview = Oneboxer.preview(url2, invalidate_oneboxes: true)
expect(Oneboxer.cached_response_body_exists?(url2)).to eq(false)
end
end
end