FIX: Ignore OneBox blacklisted domains.
This commit is contained in:
parent
b12cf08c57
commit
b6963b8ffb
|
@ -26,7 +26,7 @@ class FinalDestination
|
|||
"HTTPS_DOMAIN_#{domain}"
|
||||
end
|
||||
|
||||
attr_reader :status, :cookie, :status_code
|
||||
attr_reader :status, :cookie, :status_code, :ignored
|
||||
|
||||
def initialize(url, opts = nil)
|
||||
@url = url
|
||||
|
@ -36,7 +36,15 @@ class FinalDestination
|
|||
@force_get_hosts = @opts[:force_get_hosts] || []
|
||||
@opts[:max_redirects] ||= 5
|
||||
@opts[:lookup_ip] ||= lambda { |host| FinalDestination.lookup_ip(host) }
|
||||
@ignored = [Discourse.base_url_no_prefix] + (@opts[:ignore_redirects] || [])
|
||||
|
||||
@ignored = @opts[:ignore_hostnames] || []
|
||||
[Discourse.base_url_no_prefix].concat(@opts[:ignore_redirects] || []).each do |url|
|
||||
url = uri(url)
|
||||
if url.present? && url.hostname
|
||||
@ignored << url.hostname
|
||||
end
|
||||
end
|
||||
|
||||
@limit = @opts[:max_redirects]
|
||||
@status = :ready
|
||||
@http_verb = @force_get_hosts.any? { |host| hostname_matches?(host) } ? :get : :head
|
||||
|
@ -131,18 +139,18 @@ class FinalDestination
|
|||
return nil
|
||||
end
|
||||
|
||||
@ignored.each do |host|
|
||||
if hostname_matches?(host)
|
||||
@status = :resolved
|
||||
return @uri
|
||||
end
|
||||
end
|
||||
|
||||
unless validate_uri
|
||||
log(:warn, "FinalDestination could not resolve URL (invalid URI): #{@uri}") if @verbose
|
||||
return nil
|
||||
end
|
||||
|
||||
@ignored.each do |host|
|
||||
if @uri&.hostname&.match?(host)
|
||||
@status = :resolved
|
||||
return @uri
|
||||
end
|
||||
end
|
||||
|
||||
headers = request_headers
|
||||
response = Excon.public_send(@http_verb,
|
||||
@uri.to_s,
|
||||
|
|
|
@ -250,9 +250,11 @@ module Oneboxer
|
|||
|
||||
def self.external_onebox(url)
|
||||
Rails.cache.fetch(onebox_cache_key(url), expires_in: 1.day) do
|
||||
fd = FinalDestination.new(url, ignore_redirects: ignore_redirects, force_get_hosts: force_get_hosts)
|
||||
ignored = SiteSetting.onebox_domains_blacklist.split("|")
|
||||
|
||||
fd = FinalDestination.new(url, ignore_redirects: ignore_redirects, ignore_hostnames: ignored, force_get_hosts: force_get_hosts)
|
||||
uri = fd.resolve
|
||||
return blank_onebox if uri.blank? || SiteSetting.onebox_domains_blacklist.include?(uri.hostname)
|
||||
return blank_onebox if uri.blank? || ignored.map { |hostname| uri.hostname.match?(hostname) }.any?
|
||||
|
||||
options = {
|
||||
cache: {},
|
||||
|
|
|
@ -47,6 +47,14 @@ describe FinalDestination do
|
|||
FinalDestination.new(url, opts)
|
||||
end
|
||||
|
||||
it 'correctly parses ignored hostnames' do
|
||||
fd = FinalDestination.new('https://meta.discourse.org',
|
||||
ignore_redirects: ['http://google.com', 'youtube.com', 'https://meta.discourse.org', '://bing.com']
|
||||
)
|
||||
|
||||
expect(fd.ignored).to eq(['test.localhost', 'google.com', 'meta.discourse.org'])
|
||||
end
|
||||
|
||||
describe '.resolve' do
|
||||
|
||||
it "has a ready status code before anything happens" do
|
||||
|
|
|
@ -107,4 +107,13 @@ describe Oneboxer do
|
|||
end
|
||||
end
|
||||
|
||||
it "does not crawl blacklisted URLs" do
|
||||
SiteSetting.onebox_domains_blacklist = "git.*.com|bitbucket.com"
|
||||
url = 'https://github.com/discourse/discourse/commit/21b562852885f883be43032e03c709241e8e6d4f'
|
||||
stub_request(:head, 'https://discourse.org/').to_return(status: 302, body: "", headers: { location: url })
|
||||
|
||||
expect(Oneboxer.external_onebox(url)[:onebox]).to be_empty
|
||||
expect(Oneboxer.external_onebox('https://discourse.org/')[:onebox]).to be_empty
|
||||
end
|
||||
|
||||
end
|
||||
|
|
Loading…
Reference in New Issue