FIX: Ignore OneBox blacklisted domains.

This commit is contained in:
Bianca Nenciu 2018-08-26 16:31:02 +02:00
parent b12cf08c57
commit b6963b8ffb
4 changed files with 38 additions and 11 deletions

View File

@ -26,7 +26,7 @@ class FinalDestination
"HTTPS_DOMAIN_#{domain}" "HTTPS_DOMAIN_#{domain}"
end end
attr_reader :status, :cookie, :status_code attr_reader :status, :cookie, :status_code, :ignored
def initialize(url, opts = nil) def initialize(url, opts = nil)
@url = url @url = url
@ -36,7 +36,15 @@ class FinalDestination
@force_get_hosts = @opts[:force_get_hosts] || [] @force_get_hosts = @opts[:force_get_hosts] || []
@opts[:max_redirects] ||= 5 @opts[:max_redirects] ||= 5
@opts[:lookup_ip] ||= lambda { |host| FinalDestination.lookup_ip(host) } @opts[:lookup_ip] ||= lambda { |host| FinalDestination.lookup_ip(host) }
@ignored = [Discourse.base_url_no_prefix] + (@opts[:ignore_redirects] || [])
@ignored = @opts[:ignore_hostnames] || []
[Discourse.base_url_no_prefix].concat(@opts[:ignore_redirects] || []).each do |url|
url = uri(url)
if url.present? && url.hostname
@ignored << url.hostname
end
end
@limit = @opts[:max_redirects] @limit = @opts[:max_redirects]
@status = :ready @status = :ready
@http_verb = @force_get_hosts.any? { |host| hostname_matches?(host) } ? :get : :head @http_verb = @force_get_hosts.any? { |host| hostname_matches?(host) } ? :get : :head
@ -131,18 +139,18 @@ class FinalDestination
return nil return nil
end end
@ignored.each do |host|
if hostname_matches?(host)
@status = :resolved
return @uri
end
end
unless validate_uri unless validate_uri
log(:warn, "FinalDestination could not resolve URL (invalid URI): #{@uri}") if @verbose log(:warn, "FinalDestination could not resolve URL (invalid URI): #{@uri}") if @verbose
return nil return nil
end end
@ignored.each do |host|
if @uri&.hostname&.match?(host)
@status = :resolved
return @uri
end
end
headers = request_headers headers = request_headers
response = Excon.public_send(@http_verb, response = Excon.public_send(@http_verb,
@uri.to_s, @uri.to_s,

View File

@ -250,9 +250,11 @@ module Oneboxer
def self.external_onebox(url) def self.external_onebox(url)
Rails.cache.fetch(onebox_cache_key(url), expires_in: 1.day) do Rails.cache.fetch(onebox_cache_key(url), expires_in: 1.day) do
fd = FinalDestination.new(url, ignore_redirects: ignore_redirects, force_get_hosts: force_get_hosts) ignored = SiteSetting.onebox_domains_blacklist.split("|")
fd = FinalDestination.new(url, ignore_redirects: ignore_redirects, ignore_hostnames: ignored, force_get_hosts: force_get_hosts)
uri = fd.resolve uri = fd.resolve
return blank_onebox if uri.blank? || SiteSetting.onebox_domains_blacklist.include?(uri.hostname) return blank_onebox if uri.blank? || ignored.map { |hostname| uri.hostname.match?(hostname) }.any?
options = { options = {
cache: {}, cache: {},

View File

@ -47,6 +47,14 @@ describe FinalDestination do
FinalDestination.new(url, opts) FinalDestination.new(url, opts)
end end
it 'correctly parses ignored hostnames' do
fd = FinalDestination.new('https://meta.discourse.org',
ignore_redirects: ['http://google.com', 'youtube.com', 'https://meta.discourse.org', '://bing.com']
)
expect(fd.ignored).to eq(['test.localhost', 'google.com', 'meta.discourse.org'])
end
describe '.resolve' do describe '.resolve' do
it "has a ready status code before anything happens" do it "has a ready status code before anything happens" do

View File

@ -107,4 +107,13 @@ describe Oneboxer do
end end
end end
it "does not crawl blacklisted URLs" do
SiteSetting.onebox_domains_blacklist = "git.*.com|bitbucket.com"
url = 'https://github.com/discourse/discourse/commit/21b562852885f883be43032e03c709241e8e6d4f'
stub_request(:head, 'https://discourse.org/').to_return(status: 302, body: "", headers: { location: url })
expect(Oneboxer.external_onebox(url)[:onebox]).to be_empty
expect(Oneboxer.external_onebox('https://discourse.org/')[:onebox]).to be_empty
end
end end