FEATURE: Oneboxer cache response body (#12562)
* FEATURE: Cache successful HTTP GET requests during Oneboxing Some oneboxes may fail if when making excessive and/or odd requests against the target domains. This change provides a simple mechanism to cache the results of succesful GET requests as part of the oneboxing process, with the goal of reducing repeated requests and ultimately improving the rate of successful oneboxing. To enable: Set `SiteSetting.cache_onebox_response_body` to `true` Add the domains you’re interesting in caching to `SiteSetting. cache_onebox_response_body_domains` e.g. `example.com|example.org|example.net` Optionally set `SiteSetting.cache_onebox_user_agent` to a user agent string of your choice to use when making requests against domains in the above list. * FIX: Swap order of duration and value in redis call The correct order for `setex` arguments is `key`, `duration`, and `value`. Duration and value had been flipped, however the code would not have thrown an error because we were caching the value of `1.day.to_i` for a period of 1 seconds… The intention appears to be to set a value of 1 (purely as a flag) for a period of 1 day.
This commit is contained in:
parent
e704f0a541
commit
68d0916eb5
|
@ -1638,6 +1638,16 @@ onebox:
|
|||
facebook_app_access_token:
|
||||
default: ""
|
||||
secret: true
|
||||
cache_onebox_response_body:
|
||||
default: false
|
||||
hidden: true
|
||||
cache_onebox_response_body_domains:
|
||||
default: ""
|
||||
type: list
|
||||
hidden: true
|
||||
cache_onebox_user_agent:
|
||||
default: ""
|
||||
hidden: true
|
||||
spam:
|
||||
add_rel_nofollow_to_user_content: true
|
||||
hide_post_sensitivity:
|
||||
|
|
|
@ -16,7 +16,7 @@ class FinalDestination
|
|||
|
||||
def self.cache_https_domain(domain)
|
||||
key = redis_https_key(domain)
|
||||
Discourse.redis.without_namespace.setex(key, "1", 1.day.to_i).present?
|
||||
Discourse.redis.without_namespace.setex(key, 1.day.to_i, "1")
|
||||
end
|
||||
|
||||
def self.is_https_domain?(domain)
|
||||
|
@ -28,6 +28,8 @@ class FinalDestination
|
|||
"HTTPS_DOMAIN_#{domain}"
|
||||
end
|
||||
|
||||
DEFAULT_USER_AGENT = "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/14.0 Safari/605.1.15"
|
||||
|
||||
attr_reader :status, :cookie, :status_code, :ignored
|
||||
|
||||
def initialize(url, opts = nil)
|
||||
|
@ -38,6 +40,7 @@ class FinalDestination
|
|||
@force_get_hosts = @opts[:force_get_hosts] || []
|
||||
@preserve_fragment_url_hosts = @opts[:preserve_fragment_url_hosts] || []
|
||||
@force_custom_user_agent_hosts = @opts[:force_custom_user_agent_hosts] || []
|
||||
@default_user_agent = @opts[:default_user_agent] || DEFAULT_USER_AGENT
|
||||
@opts[:max_redirects] ||= 5
|
||||
@opts[:lookup_ip] ||= lambda { |host| FinalDestination.lookup_ip(host) }
|
||||
|
||||
|
@ -67,7 +70,7 @@ class FinalDestination
|
|||
@timeout = @opts[:timeout] || nil
|
||||
@preserve_fragment_url = @preserve_fragment_url_hosts.any? { |host| hostname_matches?(host) }
|
||||
@validate_uri = @opts.fetch(:validate_uri) { true }
|
||||
@user_agent = @force_custom_user_agent_hosts.any? { |host| hostname_matches?(host) } ? Onebox.options.user_agent : "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/14.0 Safari/605.1.15"
|
||||
@user_agent = @force_custom_user_agent_hosts.any? { |host| hostname_matches?(host) } ? Onebox.options.user_agent : @default_user_agent
|
||||
end
|
||||
|
||||
def self.connection_timeout
|
||||
|
@ -182,20 +185,35 @@ class FinalDestination
|
|||
end
|
||||
end
|
||||
|
||||
if Oneboxer.cached_response_body_exists?(@uri.to_s)
|
||||
@status = :resolved
|
||||
return @uri
|
||||
end
|
||||
|
||||
headers = request_headers
|
||||
middlewares = Excon.defaults[:middlewares]
|
||||
middlewares << Excon::Middleware::Decompress if @http_verb == :get
|
||||
|
||||
response = Excon.public_send(@http_verb,
|
||||
@uri.to_s,
|
||||
read_timeout: timeout,
|
||||
headers: headers
|
||||
headers: headers,
|
||||
middlewares: middlewares
|
||||
)
|
||||
|
||||
location = nil
|
||||
response_headers = nil
|
||||
|
||||
response_status = response.status.to_i
|
||||
|
||||
case response.status
|
||||
when 200
|
||||
# Cache body of successful `get` requests
|
||||
if @http_verb == :get
|
||||
if Oneboxer.cache_response_body?(@uri)
|
||||
Oneboxer.cache_response_body(@uri.to_s, response.body)
|
||||
end
|
||||
end
|
||||
|
||||
@status = :resolved
|
||||
return @uri
|
||||
when 400, 405, 406, 409, 501
|
||||
|
|
|
@ -26,13 +26,17 @@ module Oneboxer
|
|||
@ignore_redirects ||= ['http://www.dropbox.com', 'http://store.steampowered.com', 'http://vimeo.com', Discourse.base_url]
|
||||
end
|
||||
|
||||
def self.force_get_hosts
|
||||
@force_get_hosts ||= begin
|
||||
hosts = ['http://us.battle.net', 'https://news.yahoo.com']
|
||||
def self.amazon_domains
|
||||
amazon_suffixes = %w(com com.br ca cn fr de in it co.jp com.mx nl pl sa sg es se com.tr ae co.uk)
|
||||
|
||||
hosts + amazon_suffixes.collect { |suffix| "https://www.amazon.#{suffix}" }
|
||||
amazon_suffixes.collect { |suffix| "https://www.amazon.#{suffix}" }
|
||||
end
|
||||
|
||||
def self.force_get_hosts
|
||||
hosts = ['http://us.battle.net', 'https://news.yahoo.com']
|
||||
hosts += SiteSetting.cache_onebox_response_body_domains.split('|').collect { |domain| "https://www.#{domain}" }
|
||||
hosts += amazon_domains
|
||||
|
||||
hosts.uniq
|
||||
end
|
||||
|
||||
def self.force_custom_user_agent_hosts
|
||||
|
@ -80,6 +84,33 @@ module Oneboxer
|
|||
Discourse.cache.delete(onebox_failed_cache_key(url))
|
||||
end
|
||||
|
||||
def self.cache_response_body?(uri)
|
||||
uri = URI.parse(uri) if uri.is_a?(String)
|
||||
|
||||
if SiteSetting.cache_onebox_response_body?
|
||||
SiteSetting.cache_onebox_response_body_domains.split("|").any? { |domain| uri.hostname.ends_with?(domain) }
|
||||
end
|
||||
end
|
||||
|
||||
def self.cache_response_body(uri, response)
|
||||
key = redis_cached_response_body_key(uri)
|
||||
Discourse.redis.without_namespace.setex(key, 1.minutes.to_i, response)
|
||||
end
|
||||
|
||||
def self.cached_response_body_exists?(uri)
|
||||
key = redis_cached_response_body_key(uri)
|
||||
Discourse.redis.without_namespace.exists(key).to_i > 0
|
||||
end
|
||||
|
||||
def self.fetch_cached_response_body(uri)
|
||||
key = redis_cached_response_body_key(uri)
|
||||
Discourse.redis.without_namespace.get(key)
|
||||
end
|
||||
|
||||
def self.redis_cached_response_body_key(uri)
|
||||
"CACHED_RESPONSE_#{uri}"
|
||||
end
|
||||
|
||||
# Parse URLs out of HTML, returning the document when finished.
|
||||
def self.each_onebox_link(string_or_doc, extra_paths: [])
|
||||
doc = string_or_doc
|
||||
|
@ -368,12 +399,18 @@ module Oneboxer
|
|||
|
||||
def self.external_onebox(url)
|
||||
Discourse.cache.fetch(onebox_cache_key(url), expires_in: 1.day) do
|
||||
fd = FinalDestination.new(url,
|
||||
fd_options = {
|
||||
ignore_redirects: ignore_redirects,
|
||||
ignore_hostnames: blocked_domains,
|
||||
force_get_hosts: force_get_hosts,
|
||||
force_custom_user_agent_hosts: force_custom_user_agent_hosts,
|
||||
preserve_fragment_url_hosts: preserve_fragment_url_hosts)
|
||||
preserve_fragment_url_hosts: preserve_fragment_url_hosts
|
||||
}
|
||||
|
||||
user_agent_override = SiteSetting.cache_onebox_user_agent if Oneboxer.cache_response_body?(url) && SiteSetting.cache_onebox_user_agent.present?
|
||||
fd_options[:default_user_agent] = user_agent_override if user_agent_override
|
||||
|
||||
fd = FinalDestination.new(url, fd_options)
|
||||
uri = fd.resolve
|
||||
|
||||
if fd.status != :resolved
|
||||
|
@ -389,20 +426,22 @@ module Oneboxer
|
|||
return error_box
|
||||
end
|
||||
|
||||
return blank_onebox if uri.blank? || blocked_domains.map { |hostname| uri.hostname.match?(hostname) }.any?
|
||||
return blank_onebox if uri.blank? || blocked_domains.any? { |hostname| uri.hostname.match?(hostname) }
|
||||
|
||||
options = {
|
||||
onebox_options = {
|
||||
max_width: 695,
|
||||
sanitize_config: Onebox::DiscourseOneboxSanitizeConfig::Config::DISCOURSE_ONEBOX,
|
||||
allowed_iframe_origins: allowed_iframe_origins,
|
||||
hostname: GlobalSetting.hostname,
|
||||
facebook_app_access_token: SiteSetting.facebook_app_access_token,
|
||||
disable_media_download_controls: SiteSetting.disable_onebox_media_download_controls
|
||||
disable_media_download_controls: SiteSetting.disable_onebox_media_download_controls,
|
||||
body_cacher: self
|
||||
}
|
||||
|
||||
options[:cookie] = fd.cookie if fd.cookie
|
||||
onebox_options[:cookie] = fd.cookie if fd.cookie
|
||||
onebox_options[:user_agent] = user_agent_override if user_agent_override
|
||||
|
||||
r = Onebox.preview(uri.to_s, options)
|
||||
r = Onebox.preview(uri.to_s, onebox_options)
|
||||
result = { onebox: r.to_s, preview: r&.placeholder_html.to_s }
|
||||
|
||||
# NOTE: Call r.errors after calling placeholder_html
|
||||
|
|
|
@ -330,9 +330,51 @@ describe Oneboxer do
|
|||
end
|
||||
|
||||
describe '#force_get_hosts' do
|
||||
before do
|
||||
SiteSetting.cache_onebox_response_body_domains = "example.net|example.com|example.org"
|
||||
end
|
||||
|
||||
it "includes Amazon sites" do
|
||||
expect(Oneboxer.force_get_hosts).to include('https://www.amazon.ca')
|
||||
end
|
||||
|
||||
it "includes cache_onebox_response_body_domains" do
|
||||
expect(Oneboxer.force_get_hosts).to include('https://www.example.com')
|
||||
end
|
||||
end
|
||||
|
||||
describe 'cache_onebox_response_body' do
|
||||
let(:html) do
|
||||
<<~HTML
|
||||
<html>
|
||||
<body>
|
||||
<p>cache me if you can</p>
|
||||
</body>
|
||||
<html>
|
||||
HTML
|
||||
end
|
||||
|
||||
let(:url) { "https://www.example.com/my/great/content" }
|
||||
let(:url2) { "https://www.example2.com/my/great/content" }
|
||||
|
||||
before do
|
||||
stub_request(:any, url).to_return(status: 200, body: html)
|
||||
stub_request(:any, url2).to_return(status: 200, body: html)
|
||||
|
||||
SiteSetting.cache_onebox_response_body = true
|
||||
SiteSetting.cache_onebox_response_body_domains = "example.net|example.com|example.org"
|
||||
end
|
||||
|
||||
it "caches when domain matches" do
|
||||
preview = Oneboxer.preview(url, invalidate_oneboxes: true)
|
||||
expect(Oneboxer.cached_response_body_exists?(url)).to eq(true)
|
||||
expect(Oneboxer.fetch_cached_response_body(url)).to eq(html)
|
||||
end
|
||||
|
||||
it "ignores cache when domain not present" do
|
||||
preview = Oneboxer.preview(url2, invalidate_oneboxes: true)
|
||||
expect(Oneboxer.cached_response_body_exists?(url2)).to eq(false)
|
||||
end
|
||||
end
|
||||
|
||||
end
|
||||
|
|
Loading…
Reference in New Issue