Merge pull request #267 from tms/onebox-safety

Tighten up onebox rendering
This commit is contained in:
Robin Ward 2013-02-26 07:31:15 -08:00
commit 465772df72
10 changed files with 115 additions and 93 deletions

View File

@ -31,11 +31,14 @@ module Oneboxer
oneboxer = onebox_for_url(url) oneboxer = onebox_for_url(url)
return oneboxer.onebox if oneboxer.present? return oneboxer.onebox if oneboxer.present?
if Whitelist.allowed?(url) whitelist_entry = Whitelist.entry_for_url(url)
if whitelist_entry.present?
page_html = open(url).read page_html = open(url).read
if page_html.present? if page_html.present?
doc = Nokogiri::HTML(page_html) doc = Nokogiri::HTML(page_html)
if whitelist_entry.allows_oembed?
# See if if it has an oembed thing we can use # See if if it has an oembed thing we can use
(doc/"link[@type='application/json+oembed']").each do |oembed| (doc/"link[@type='application/json+oembed']").each do |oembed|
return OembedOnebox.new(oembed[:href]).onebox return OembedOnebox.new(oembed[:href]).onebox
@ -43,6 +46,7 @@ module Oneboxer
(doc/"link[@type='text/json+oembed']").each do |oembed| (doc/"link[@type='text/json+oembed']").each do |oembed|
return OembedOnebox.new(oembed[:href]).onebox return OembedOnebox.new(oembed[:href]).onebox
end end
end
# Check for opengraph # Check for opengraph
open_graph = Oneboxer.parse_open_graph(doc) open_graph = Oneboxer.parse_open_graph(doc)

View File

@ -3,7 +3,7 @@ require_dependency 'oneboxer/oembed_onebox'
module Oneboxer module Oneboxer
class ClikthroughOnebox < OembedOnebox class ClikthroughOnebox < OembedOnebox
matcher /clikthrough\.com\/theater\/video\/\d+$/ matcher /^https?:\/\/(?:www\.)?clikthrough\.com\/theater\/video\/\d+$/
def oembed_endpoint def oembed_endpoint
"http://clikthrough.com/services/oembed?url=#{BaseOnebox.uriencode(@url)}" "http://clikthrough.com/services/oembed?url=#{BaseOnebox.uriencode(@url)}"

View File

@ -3,7 +3,7 @@ require_dependency 'oneboxer/oembed_onebox'
module Oneboxer module Oneboxer
class DailymotionOnebox < OembedOnebox class DailymotionOnebox < OembedOnebox
matcher /dailymotion\.com\/.+$/ matcher /^https?:\/\/(?:www\.)?dailymotion\.com\/.+$/
def oembed_endpoint def oembed_endpoint
"http://www.dailymotion.com/api/oembed/?url=#{BaseOnebox.uriencode(@url)}" "http://www.dailymotion.com/api/oembed/?url=#{BaseOnebox.uriencode(@url)}"

View File

@ -3,7 +3,7 @@ require_dependency 'oneboxer/oembed_onebox'
module Oneboxer module Oneboxer
class DotsubOnebox < OembedOnebox class DotsubOnebox < OembedOnebox
matcher /dotsub\.com\/.+$/ matcher /^https?:\/\/(?:www\.)?dotsub\.com\/.+$/
def oembed_endpoint def oembed_endpoint
"http://dotsub.com/services/oembed?url=#{BaseOnebox.uriencode(@url)}" "http://dotsub.com/services/oembed?url=#{BaseOnebox.uriencode(@url)}"

View File

@ -3,7 +3,7 @@ require_dependency 'oneboxer/handlebars_onebox'
module Oneboxer module Oneboxer
class GithubBlobOnebox < HandlebarsOnebox class GithubBlobOnebox < HandlebarsOnebox
matcher /github\.com\/[^\/]+\/[^\/]+\/blob\/.*/ matcher /^https?:\/\/(?:www\.)?github\.com\/[^\/]+\/[^\/]+\/blob\/.*/
favicon 'github.png' favicon 'github.png'
def translate_url def translate_url

View File

@ -3,7 +3,7 @@ require_dependency 'oneboxer/oembed_onebox'
module Oneboxer module Oneboxer
class KinomapOnebox < OembedOnebox class KinomapOnebox < OembedOnebox
matcher /kinomap\.com/ matcher /^https?:\/\/(?:www\.)?kinomap\.com/
def oembed_endpoint def oembed_endpoint
"http://www.kinomap.com/oembed?url=#{BaseOnebox.uriencode(@url)}&format=json" "http://www.kinomap.com/oembed?url=#{BaseOnebox.uriencode(@url)}&format=json"

View File

@ -3,7 +3,7 @@ require_dependency 'oneboxer/oembed_onebox'
module Oneboxer module Oneboxer
class NfbOnebox < OembedOnebox class NfbOnebox < OembedOnebox
matcher /nfb\.ca\/film\/[-\w]+\/?/ matcher /^https?:\/\/(?:www\.)?nfb\.ca\/film\/[-\w]+\/?/
def oembed_endpoint def oembed_endpoint
"http://www.nfb.ca/remote/services/oembed/?url=#{BaseOnebox.uriencode(@url)}&format=json" "http://www.nfb.ca/remote/services/oembed/?url=#{BaseOnebox.uriencode(@url)}&format=json"

View File

@ -3,7 +3,7 @@ require_dependency 'oneboxer/oembed_onebox'
module Oneboxer module Oneboxer
class ViddlerOnebox < OembedOnebox class ViddlerOnebox < OembedOnebox
matcher /viddler\.com\/.+$/ matcher /^https?:\/\/(?:www\.)?viddler\.com\/.+$/
def oembed_endpoint def oembed_endpoint
"http://lab.viddler.com/services/oembed/?url=#{BaseOnebox.uriencode(@url)}" "http://lab.viddler.com/services/oembed/?url=#{BaseOnebox.uriencode(@url)}"

View File

@ -2,84 +2,102 @@ module Oneboxer
module Whitelist module Whitelist
def self.entries def self.entries
[/cnn\.com\/.+/, [
/washingtonpost\.com\/.+/, Entry.new(/^https?:\/\/(?:www\.)?cnn\.com\/.+/),
/\/\d{4}\/\d{2}\/\d{2}\//, # wordpress Entry.new(/^https?:\/\/(?:www\.)?washingtonpost\.com\/.+/),
/funnyordie\.com\/.+/, Entry.new(/^https?:\/\/(?:www\.)?funnyordie\.com\/.+/),
/youtube\.com\/.+/, Entry.new(/^https?:\/\/(?:www\.)?youtube\.com\/.+/),
/youtu\.be\/.+/, Entry.new(/^https?:\/\/(?:www\.)?youtu\.be\/.+/),
/500px\.com\/.+/, Entry.new(/^https?:\/\/(?:www\.)?500px\.com\/.+/),
/scribd\.com\/.+/, Entry.new(/^https?:\/\/(?:www\.)?scribd\.com\/.+/),
/photobucket\.com\/.+/, Entry.new(/^https?:\/\/(?:www\.)?photobucket\.com\/.+/),
/ebay\.(com|ca|co\.uk)\/.+/, Entry.new(/^https?:\/\/(?:www\.)?ebay\.(com|ca|co\.uk)\/.+/),
/nytimes\.com\/.+/, Entry.new(/^https?:\/\/(?:www\.)?nytimes\.com\/.+/),
/tumblr\.com\/.+/, Entry.new(/^https?:\/\/(?:www\.)?pinterest\.com\/.+/),
/pinterest\.com\/.+/, Entry.new(/^https?:\/\/(?:www\.)?imdb\.com\/.+/),
/imdb\.com\/.+/, Entry.new(/^https?:\/\/(?:www\.)?bbc\.co\.uk\/.+/),
/bbc\.co\.uk\/.+/, Entry.new(/^https?:\/\/(?:www\.)?ask\.com\/.+/),
/ask\.com\/.+/, Entry.new(/^https?:\/\/(?:www\.)?huffingtonpost\.com\/.+/),
/huffingtonpost\.com\/.+/, Entry.new(/^https?:\/\/(?:www\.)?aol\.(com|ca)\/.+/),
/aol\.(com|ca)\/.+/, Entry.new(/^https?:\/\/(?:www\.)?espn\.go\.com\/.+/),
/espn\.go\.com\/.+/, Entry.new(/^https?:\/\/(?:www\.)?about\.com\/.+/),
/about\.com\/.+/, Entry.new(/^https?:\/\/(?:www\.)?cnet\.com\/.+/),
/cnet\.com\/.+/, Entry.new(/^https?:\/\/(?:www\.)?ehow\.com\/.+/),
/ehow\.com\/.+/, Entry.new(/^https?:\/\/(?:www\.)?dailymail\.co\.uk\/.+/),
/dailymail\.co\.uk\/.+/, Entry.new(/^https?:\/\/(?:www\.)?indiatimes\.com\/.+/),
/indiatimes\.com\/.+/, Entry.new(/^https?:\/\/(?:www\.)?answers\.com\/.+/),
/answers\.com\/.+/, Entry.new(/^https?:\/\/(?:www\.)?instagr\.am\/.+/),
/instagr\.am\/.+/, Entry.new(/^https?:\/\/(?:www\.)?battle\.net\/.+/),
/battle\.net\/.+/, Entry.new(/^https?:\/\/(?:www\.)?sourceforge\.net\/.+/),
/sourceforge\.net\/.+/, Entry.new(/^https?:\/\/(?:www\.)?myspace\.com\/.+/),
/myspace\.com\/.+/, Entry.new(/^https?:\/\/(?:www\.)?wikia\.com\/.+/),
/wikia\.com\/.+/, Entry.new(/^https?:\/\/(?:www\.)?etsy\.com\/.+/),
/etsy\.com\/.+/, Entry.new(/^https?:\/\/(?:www\.)?walmart\.com\/.+/),
/walmart\.com\/.+/, Entry.new(/^https?:\/\/(?:www\.)?reference\.com\/.+/),
/reference\.com\/.+/, Entry.new(/^https?:\/\/(?:www\.)?yelp\.com\/.+/),
/yelp\.com\/.+/, Entry.new(/^https?:\/\/(?:www\.)?foxnews\.com\/.+/),
/foxnews\.com\/.+/, Entry.new(/^https?:\/\/(?:www\.)?guardian\.co\.uk\/.+/),
/guardian\.co\.uk\/.+/, Entry.new(/^https?:\/\/(?:www\.)?digg\.com\/.+/),
/digg\.com\/.+/, Entry.new(/^https?:\/\/(?:www\.)?squidoo\.com\/.+/),
/squidoo\.com\/.+/, Entry.new(/^https?:\/\/(?:www\.)?wsj\.com\/.+/),
/wsj\.com\/.+/, Entry.new(/^https?:\/\/(?:www\.)?archive\.org\/.+/),
/archive\.org\/.+/, Entry.new(/^https?:\/\/(?:www\.)?nba\.com\/.+/),
/nba\.com\/.+/, Entry.new(/^https?:\/\/(?:www\.)?samsung\.com\/.+/),
/samsung\.com\/.+/, Entry.new(/^https?:\/\/(?:www\.)?mashable\.com\/.+/),
/mashable\.com\/.+/, Entry.new(/^https?:\/\/(?:www\.)?forbes\.com\/.+/),
/forbes\.com\/.+/, Entry.new(/^https?:\/\/(?:www\.)?soundcloud\.com\/.+/),
/soundcloud\.com\/.+/, Entry.new(/^https?:\/\/(?:www\.)?thefreedictionary\.com\/.+/),
/thefreedictionary\.com\/.+/, Entry.new(/^https?:\/\/(?:www\.)?groupon\.com\/.+/),
/groupon\.com\/.+/, Entry.new(/^https?:\/\/(?:www\.)?ikea\.com\/.+/),
/ikea\.com\/.+/, Entry.new(/^https?:\/\/(?:www\.)?dell\.com\/.+/),
/dell\.com\/.+/, Entry.new(/^https?:\/\/(?:www\.)?mlb\.com\/.+/),
/mlb\.com\/.+/, Entry.new(/^https?:\/\/(?:www\.)?bestbuy\.(com|ca)\/.+/),
/bestbuy\.(com|ca)\/.+/, Entry.new(/^https?:\/\/(?:www\.)?bloomberg\.com\/.+/),
/bloomberg\.com\/.+/, Entry.new(/^https?:\/\/(?:www\.)?ign\.com\/.+/),
/ign\.com\/.+/, Entry.new(/^https?:\/\/(?:www\.)?twitpic\.com\/.+/),
/twitpic\.com\/.+/, Entry.new(/^https?:\/\/(?:www\.)?techcrunch\.com\/.+/),
/techcrunch\.com\/.+/, Entry.new(/^https?:\/\/(?:www\.)?usatoday\.com\/.+/),
/usatoday\.com\/.+/, Entry.new(/^https?:\/\/(?:www\.)?go\.com\/.+/),
/go\.com\/.+/, Entry.new(/^https?:\/\/(?:www\.)?businessinsider\.com\/.+/),
/businessinsider\.com\/.+/, Entry.new(/^https?:\/\/(?:www\.)?zillow\.com\/.+/),
/zillow\.com\/.+/, Entry.new(/^https?:\/\/(?:www\.)?tmz\.com\/.+/),
/tmz\.com\/.+/, Entry.new(/^https?:\/\/(?:www\.)?thesun\.co\.uk\/.+/),
/thesun\.co\.uk\/.+/, Entry.new(/^https?:\/\/(?:www\.)?thestar\.(com|ca)\/.+/),
/thestar\.(com|ca)\/.+/, Entry.new(/^https?:\/\/(?:www\.)?theglobeandmail\.com\/.+/),
/theglobeandmail\.com\/.+/, Entry.new(/^https?:\/\/(?:www\.)?torontosun\.com\/.+/),
/torontosun\.com\/.+/, Entry.new(/^https?:\/\/(?:www\.)?kickstarter\.com\/.+/),
/kickstarter\.com\/.+/, Entry.new(/^https?:\/\/(?:www\.)?wired\.com\/.+/),
/wired\.com\/.+/, Entry.new(/^https?:\/\/(?:www\.)?time\.com\/.+/),
/time\.com\/.+/, Entry.new(/^https?:\/\/(?:www\.)?npr\.org\/.+/),
/npr\.org\/.+/, Entry.new(/^https?:\/\/(?:www\.)?cracked\.com\/.+/),
/cracked\.com\/.+/, Entry.new(/^https?:\/\/(?:www\.)?deadline\.com\/.+/),
/thinkgeek\.com\/.+/, Entry.new(/^https?:\/\/(?:www\.)?thinkgeek\.com\/.+/),
/deadline\.com\/.+/ Entry.new(/^https?:\/\/(?:www\.)?tumblr\.com\/.+/, false),
Entry.new(/\/\d{4}\/\d{2}\/\d{2}\//, false), # wordpress
] ]
end end
def self.allowed?(url) def self.entry_for_url(url)
#return true entries.each {|e| return e if e.matches?(url) }
entries.each {|e| return true if url =~ e } nil
false end
private
class Entry
# oembed = false is probably safer, but this is the least-drastic change
def initialize(pattern, oembed = true)
@pattern = pattern
@oembed = oembed
end
def allows_oembed?
@oembed
end
def matches?(url)
url =~ @pattern
end
end end
end end

View File

@ -3,7 +3,7 @@ require_dependency 'oneboxer/oembed_onebox'
module Oneboxer module Oneboxer
class YfrogOnebox < OembedOnebox class YfrogOnebox < OembedOnebox
matcher /yfrog\.(com|ru|com\.tr|it|fr|co\.il|co\.uk|com\.pl|pl|eu|us)\/[a-zA-Z0-9]+/ matcher /^https?:\/\/(?:www\.)?yfrog\.(com|ru|com\.tr|it|fr|co\.il|co\.uk|com\.pl|pl|eu|us)\/[a-zA-Z0-9]+/
def oembed_endpoint def oembed_endpoint
"http://www.yfrog.com/api/oembed/?url=#{BaseOnebox.uriencode(@url)}&format=json" "http://www.yfrog.com/api/oembed/?url=#{BaseOnebox.uriencode(@url)}&format=json"