FEATURE: A site setting to prevent crawling on private IP blocks

This commit is contained in:
Robin Ward 2017-05-23 11:51:23 -04:00
parent c9028f517a
commit 93a5fc62bf
4 changed files with 70 additions and 43 deletions

View File

@ -1029,6 +1029,7 @@ en:
allow_moderators_to_create_categories: "Allow moderators to create new categories" allow_moderators_to_create_categories: "Allow moderators to create new categories"
cors_origins: "Allowed origins for cross-origin requests (CORS). Each origin must include http:// or https://. The DISCOURSE_ENABLE_CORS env variable must be set to true to enable CORS." cors_origins: "Allowed origins for cross-origin requests (CORS). Each origin must include http:// or https://. The DISCOURSE_ENABLE_CORS env variable must be set to true to enable CORS."
use_admin_ip_whitelist: "Admins can only log in if they are at an IP address defined in the Screened IPs list (Admin > Logs > Screened Ips)." use_admin_ip_whitelist: "Admins can only log in if they are at an IP address defined in the Screened IPs list (Admin > Logs > Screened Ips)."
blacklist_ip_blocks: "A list of private IP blocks that should never be crawled by Discourse"
top_menu: "Determine which items appear in the homepage navigation, and in what order. Example latest|new|unread|categories|top|read|posted|bookmarks" top_menu: "Determine which items appear in the homepage navigation, and in what order. Example latest|new|unread|categories|top|read|posted|bookmarks"
post_menu: "Determine which items appear on the post menu, and in what order. Example like|edit|flag|delete|share|bookmark|reply" post_menu: "Determine which items appear on the post menu, and in what order. Example like|edit|flag|delete|share|bookmark|reply"
post_menu_hidden_items: "The menu items to hide by default in the post menu unless an expansion ellipsis is clicked on." post_menu_hidden_items: "The menu items to hide by default in the post menu unless an expansion ellipsis is clicked on."

View File

@ -890,6 +890,10 @@ security:
use_admin_ip_whitelist: use_admin_ip_whitelist:
default: false default: false
client: true client: true
blacklist_ip_blocks:
default: ''
type: list
shadowed_by_global: true
onebox: onebox:
enable_flash_video_onebox: false enable_flash_video_onebox: false

View File

@ -7,7 +7,7 @@ class FinalDestination
attr_reader :status attr_reader :status
def initialize(url, opts = nil) def initialize(url, opts=nil)
@uri = URI(url) rescue nil @uri = URI(url) rescue nil
@opts = opts || {} @opts = opts || {}
@opts[:max_redirects] ||= 5 @opts[:max_redirects] ||= 5
@ -85,9 +85,8 @@ class FinalDestination
return false unless address_s return false unless address_s
address = IPAddr.new(address_s) address = IPAddr.new(address_s)
private_match = FinalDestination.private_ranges.any? {|r| r === address }
if private_match if private_ranges.any? {|r| r === address }
@status = :invalid_address @status = :invalid_address
return false return false
end end
@ -95,7 +94,12 @@ class FinalDestination
true true
end end
def self.private_ranges def private_ranges
FinalDestination.standard_private_ranges +
SiteSetting.blacklist_ip_blocks.split('|').map {|r| IPAddr.new(r) rescue nil }.compact
end
def self.standard_private_ranges
@private_ranges ||= [ @private_ranges ||= [
IPAddr.new('127.0.0.1'), IPAddr.new('127.0.0.1'),
IPAddr.new('172.16.0.0/12'), IPAddr.new('172.16.0.0/12'),

View File

@ -11,6 +11,7 @@ describe FinalDestination do
when 'codinghorror.com' then '91.146.108.148' when 'codinghorror.com' then '91.146.108.148'
when 'discourse.org' then '104.25.152.10' when 'discourse.org' then '104.25.152.10'
when 'private-host.com' then '192.168.10.1' when 'private-host.com' then '192.168.10.1'
when 'internal-ipv6.com' then '2001:abc:de:01:3:3d0:6a65:c2bf'
else else
host host
end end
@ -27,15 +28,19 @@ describe FinalDestination do
Excon.stub({ method: :head, hostname: from }, { status: 302, headers: { "Location" => dest } }) Excon.stub({ method: :head, hostname: from }, { status: 302, headers: { "Location" => dest } })
end end
def fd(url)
FinalDestination.new(url, opts)
end
describe '.resolve' do describe '.resolve' do
it "has a ready status code before anything happens" do it "has a ready status code before anything happens" do
expect(FinalDestination.new('https://eviltrout.com').status).to eq(:ready) expect(fd('https://eviltrout.com').status).to eq(:ready)
end end
it "returns nil an invalid url" do it "returns nil an invalid url" do
expect(FinalDestination.new(nil, opts).resolve).to be_nil expect(fd(nil).resolve).to be_nil
expect(FinalDestination.new('asdf', opts).resolve).to be_nil expect(fd('asdf').resolve).to be_nil
end end
context "without redirects" do context "without redirects" do
@ -44,10 +49,10 @@ describe FinalDestination do
end end
it "returns the final url" do it "returns the final url" do
fd = FinalDestination.new('https://eviltrout.com', opts) final = FinalDestination.new('https://eviltrout.com', opts)
expect(fd.resolve.to_s).to eq('https://eviltrout.com') expect(final.resolve.to_s).to eq('https://eviltrout.com')
expect(fd.redirected?).to eq(false) expect(final.redirected?).to eq(false)
expect(fd.status).to eq(:resolved) expect(final.status).to eq(:resolved)
end end
end end
@ -59,10 +64,10 @@ describe FinalDestination do
end end
it "returns the final url" do it "returns the final url" do
fd = FinalDestination.new('https://eviltrout.com', opts) final = FinalDestination.new('https://eviltrout.com', opts)
expect(fd.resolve.to_s).to eq('https://discourse.org') expect(final.resolve.to_s).to eq('https://discourse.org')
expect(fd.redirected?).to eq(true) expect(final.redirected?).to eq(true)
expect(fd.status).to eq(:resolved) expect(final.status).to eq(:resolved)
end end
end end
@ -74,10 +79,10 @@ describe FinalDestination do
end end
it "returns the final url" do it "returns the final url" do
fd = FinalDestination.new('https://eviltrout.com', opts.merge(max_redirects: 1)) final = FinalDestination.new('https://eviltrout.com', opts.merge(max_redirects: 1))
expect(fd.resolve).to be_nil expect(final.resolve).to be_nil
expect(fd.redirected?).to eq(true) expect(final.redirected?).to eq(true)
expect(fd.status).to eq(:too_many_redirects) expect(final.status).to eq(:too_many_redirects)
end end
end end
@ -88,10 +93,10 @@ describe FinalDestination do
end end
it "returns the final url" do it "returns the final url" do
fd = FinalDestination.new('https://eviltrout.com', opts) final = FinalDestination.new('https://eviltrout.com', opts)
expect(fd.resolve).to be_nil expect(final.resolve).to be_nil
expect(fd.redirected?).to eq(true) expect(final.redirected?).to eq(true)
expect(fd.status).to eq(:invalid_address) expect(final.status).to eq(:invalid_address)
end end
end end
end end
@ -99,63 +104,76 @@ describe FinalDestination do
describe '.validate_uri' do describe '.validate_uri' do
context "host lookups" do context "host lookups" do
it "works for various hosts" do it "works for various hosts" do
expect(FinalDestination.new('https://private-host.com', opts).validate_uri).to eq(false) expect(fd('https://private-host.com').validate_uri).to eq(false)
expect(FinalDestination.new('https://eviltrout.com:443', opts).validate_uri).to eq(true) expect(fd('https://eviltrout.com:443').validate_uri).to eq(true)
end end
end end
end end
describe ".validate_url_format" do describe ".validate_url_format" do
it "supports http urls" do it "supports http urls" do
expect(FinalDestination.new('http://eviltrout.com', opts).validate_uri_format).to eq(true) expect(fd('http://eviltrout.com').validate_uri_format).to eq(true)
end end
it "supports https urls" do it "supports https urls" do
expect(FinalDestination.new('https://eviltrout.com', opts).validate_uri_format).to eq(true) expect(fd('https://eviltrout.com').validate_uri_format).to eq(true)
end end
it "doesn't support ftp urls" do it "doesn't support ftp urls" do
expect(FinalDestination.new('ftp://eviltrout.com', opts).validate_uri_format).to eq(false) expect(fd('ftp://eviltrout.com').validate_uri_format).to eq(false)
end end
it "returns false for schemeless URL" do it "returns false for schemeless URL" do
expect(FinalDestination.new('eviltrout.com', opts).validate_uri_format).to eq(false) expect(fd('eviltrout.com').validate_uri_format).to eq(false)
end end
it "returns false for nil URL" do it "returns false for nil URL" do
expect(FinalDestination.new(nil, opts).validate_uri_format).to eq(false) expect(fd(nil).validate_uri_format).to eq(false)
end end
it "returns false for invalid ports" do it "returns false for invalid ports" do
expect(FinalDestination.new('http://eviltrout.com:21', opts).validate_uri_format).to eq(false) expect(fd('http://eviltrout.com:21').validate_uri_format).to eq(false)
expect(FinalDestination.new('https://eviltrout.com:8000', opts).validate_uri_format).to eq(false) expect(fd('https://eviltrout.com:8000').validate_uri_format).to eq(false)
end end
it "returns true for valid ports" do it "returns true for valid ports" do
expect(FinalDestination.new('http://eviltrout.com:80', opts).validate_uri_format).to eq(true) expect(fd('http://eviltrout.com:80').validate_uri_format).to eq(true)
expect(FinalDestination.new('https://eviltrout.com:443',opts).validate_uri_format).to eq(true) expect(fd('https://eviltrout.com:443').validate_uri_format).to eq(true)
end end
end end
describe ".is_public" do describe ".is_public" do
it "returns false for a valid ipv4" do it "returns false for a valid ipv4" do
expect(FinalDestination.new("https://52.84.143.67", opts).is_public?).to eq(true) expect(fd("https://52.84.143.67").is_public?).to eq(true)
expect(FinalDestination.new("https://104.25.153.10", opts).is_public?).to eq(true) expect(fd("https://104.25.153.10").is_public?).to eq(true)
end end
it "returns true for private ipv4" do it "returns false for private ipv4" do
expect(FinalDestination.new("https://127.0.0.1", opts).is_public?).to eq(false) expect(fd("https://127.0.0.1").is_public?).to eq(false)
expect(FinalDestination.new("https://192.168.1.3", opts).is_public?).to eq(false) expect(fd("https://192.168.1.3").is_public?).to eq(false)
expect(FinalDestination.new("https://10.0.0.5", opts).is_public?).to eq(false) expect(fd("https://10.0.0.5").is_public?).to eq(false)
expect(FinalDestination.new("https://172.16.0.1", opts).is_public?).to eq(false) expect(fd("https://172.16.0.1").is_public?).to eq(false)
end
it "returns false for IPV6 via site settings" do
SiteSetting.blacklist_ip_blocks = '2001:abc:de::/48|2002:abc:de::/48'
expect(fd('https://[2001:abc:de:01:0:3f0:6a65:c2bf]').is_public?).to eq(false)
expect(fd('https://[2002:abc:de:01:0:3f0:6a65:c2bf]').is_public?).to eq(false)
expect(fd('https://internal-ipv6.com').is_public?).to eq(false)
expect(fd('https://[2003:abc:de:01:0:3f0:6a65:c2bf]').is_public?).to eq(true)
end
it "ignores invalid ranges" do
SiteSetting.blacklist_ip_blocks = '2001:abc:de::/48|eviltrout'
expect(fd('https://[2001:abc:de:01:0:3f0:6a65:c2bf]').is_public?).to eq(false)
end end
it "returns true for public ipv6" do it "returns true for public ipv6" do
expect(FinalDestination.new("https://[2001:470:1:3a8::251]", opts).is_public?).to eq(true) expect(fd("https://[2001:470:1:3a8::251]").is_public?).to eq(true)
end end
it "returns true for private ipv6" do it "returns true for private ipv6" do
expect(FinalDestination.new("https://[fdd7:b450:d4d1:6b44::1]", opts).is_public?).to eq(false) expect(fd("https://[fdd7:b450:d4d1:6b44::1]").is_public?).to eq(false)
end end
end end