FIX: Ignore OneBox blacklisted domains.
This commit is contained in:
parent
b12cf08c57
commit
b6963b8ffb
|
@ -26,7 +26,7 @@ class FinalDestination
|
||||||
"HTTPS_DOMAIN_#{domain}"
|
"HTTPS_DOMAIN_#{domain}"
|
||||||
end
|
end
|
||||||
|
|
||||||
attr_reader :status, :cookie, :status_code
|
attr_reader :status, :cookie, :status_code, :ignored
|
||||||
|
|
||||||
def initialize(url, opts = nil)
|
def initialize(url, opts = nil)
|
||||||
@url = url
|
@url = url
|
||||||
|
@ -36,7 +36,15 @@ class FinalDestination
|
||||||
@force_get_hosts = @opts[:force_get_hosts] || []
|
@force_get_hosts = @opts[:force_get_hosts] || []
|
||||||
@opts[:max_redirects] ||= 5
|
@opts[:max_redirects] ||= 5
|
||||||
@opts[:lookup_ip] ||= lambda { |host| FinalDestination.lookup_ip(host) }
|
@opts[:lookup_ip] ||= lambda { |host| FinalDestination.lookup_ip(host) }
|
||||||
@ignored = [Discourse.base_url_no_prefix] + (@opts[:ignore_redirects] || [])
|
|
||||||
|
@ignored = @opts[:ignore_hostnames] || []
|
||||||
|
[Discourse.base_url_no_prefix].concat(@opts[:ignore_redirects] || []).each do |url|
|
||||||
|
url = uri(url)
|
||||||
|
if url.present? && url.hostname
|
||||||
|
@ignored << url.hostname
|
||||||
|
end
|
||||||
|
end
|
||||||
|
|
||||||
@limit = @opts[:max_redirects]
|
@limit = @opts[:max_redirects]
|
||||||
@status = :ready
|
@status = :ready
|
||||||
@http_verb = @force_get_hosts.any? { |host| hostname_matches?(host) } ? :get : :head
|
@http_verb = @force_get_hosts.any? { |host| hostname_matches?(host) } ? :get : :head
|
||||||
|
@ -131,18 +139,18 @@ class FinalDestination
|
||||||
return nil
|
return nil
|
||||||
end
|
end
|
||||||
|
|
||||||
@ignored.each do |host|
|
|
||||||
if hostname_matches?(host)
|
|
||||||
@status = :resolved
|
|
||||||
return @uri
|
|
||||||
end
|
|
||||||
end
|
|
||||||
|
|
||||||
unless validate_uri
|
unless validate_uri
|
||||||
log(:warn, "FinalDestination could not resolve URL (invalid URI): #{@uri}") if @verbose
|
log(:warn, "FinalDestination could not resolve URL (invalid URI): #{@uri}") if @verbose
|
||||||
return nil
|
return nil
|
||||||
end
|
end
|
||||||
|
|
||||||
|
@ignored.each do |host|
|
||||||
|
if @uri&.hostname&.match?(host)
|
||||||
|
@status = :resolved
|
||||||
|
return @uri
|
||||||
|
end
|
||||||
|
end
|
||||||
|
|
||||||
headers = request_headers
|
headers = request_headers
|
||||||
response = Excon.public_send(@http_verb,
|
response = Excon.public_send(@http_verb,
|
||||||
@uri.to_s,
|
@uri.to_s,
|
||||||
|
|
|
@ -250,9 +250,11 @@ module Oneboxer
|
||||||
|
|
||||||
def self.external_onebox(url)
|
def self.external_onebox(url)
|
||||||
Rails.cache.fetch(onebox_cache_key(url), expires_in: 1.day) do
|
Rails.cache.fetch(onebox_cache_key(url), expires_in: 1.day) do
|
||||||
fd = FinalDestination.new(url, ignore_redirects: ignore_redirects, force_get_hosts: force_get_hosts)
|
ignored = SiteSetting.onebox_domains_blacklist.split("|")
|
||||||
|
|
||||||
|
fd = FinalDestination.new(url, ignore_redirects: ignore_redirects, ignore_hostnames: ignored, force_get_hosts: force_get_hosts)
|
||||||
uri = fd.resolve
|
uri = fd.resolve
|
||||||
return blank_onebox if uri.blank? || SiteSetting.onebox_domains_blacklist.include?(uri.hostname)
|
return blank_onebox if uri.blank? || ignored.map { |hostname| uri.hostname.match?(hostname) }.any?
|
||||||
|
|
||||||
options = {
|
options = {
|
||||||
cache: {},
|
cache: {},
|
||||||
|
|
|
@ -47,6 +47,14 @@ describe FinalDestination do
|
||||||
FinalDestination.new(url, opts)
|
FinalDestination.new(url, opts)
|
||||||
end
|
end
|
||||||
|
|
||||||
|
it 'correctly parses ignored hostnames' do
|
||||||
|
fd = FinalDestination.new('https://meta.discourse.org',
|
||||||
|
ignore_redirects: ['http://google.com', 'youtube.com', 'https://meta.discourse.org', '://bing.com']
|
||||||
|
)
|
||||||
|
|
||||||
|
expect(fd.ignored).to eq(['test.localhost', 'google.com', 'meta.discourse.org'])
|
||||||
|
end
|
||||||
|
|
||||||
describe '.resolve' do
|
describe '.resolve' do
|
||||||
|
|
||||||
it "has a ready status code before anything happens" do
|
it "has a ready status code before anything happens" do
|
||||||
|
|
|
@ -107,4 +107,13 @@ describe Oneboxer do
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
|
|
||||||
|
it "does not crawl blacklisted URLs" do
|
||||||
|
SiteSetting.onebox_domains_blacklist = "git.*.com|bitbucket.com"
|
||||||
|
url = 'https://github.com/discourse/discourse/commit/21b562852885f883be43032e03c709241e8e6d4f'
|
||||||
|
stub_request(:head, 'https://discourse.org/').to_return(status: 302, body: "", headers: { location: url })
|
||||||
|
|
||||||
|
expect(Oneboxer.external_onebox(url)[:onebox]).to be_empty
|
||||||
|
expect(Oneboxer.external_onebox('https://discourse.org/')[:onebox]).to be_empty
|
||||||
|
end
|
||||||
|
|
||||||
end
|
end
|
||||||
|
|
Loading…
Reference in New Issue