From ba238f92c2e5d3572a61ac539be55c0cbef29a26 Mon Sep 17 00:00:00 2001 From: Robin Ward Date: Tue, 19 Feb 2013 14:22:13 -0500 Subject: [PATCH] Revert "Merge branch 'onebox-safety' of git://github.com/tms/discourse" This reverts commit 7ca57db97afda4d1c130269061f0db3cb9d0dd42, reversing changes made to b7e027cfd1590365ac9c85141ff2b02c25e900fb. --- lib/oneboxer.rb | 14 +- lib/oneboxer/clikthrough_onebox.rb | 2 +- lib/oneboxer/dailymotion_onebox.rb | 2 +- lib/oneboxer/dotsub_onebox.rb | 2 +- lib/oneboxer/github_blob_onebox.rb | 2 +- lib/oneboxer/kinomap_onebox.rb | 2 +- lib/oneboxer/nfb_onebox.rb | 2 +- lib/oneboxer/open_graph_onebox.rb | 1 - lib/oneboxer/templates/simple_onebox.hbrs | 7 +- lib/oneboxer/viddler_onebox.rb | 2 +- lib/oneboxer/whitelist.rb | 175 +++++++++------------- lib/oneboxer/yfrog_onebox.rb | 2 +- 12 files changed, 88 insertions(+), 125 deletions(-) diff --git a/lib/oneboxer.rb b/lib/oneboxer.rb index 107b6c01f38..01b7da6adc2 100644 --- a/lib/oneboxer.rb +++ b/lib/oneboxer.rb @@ -36,14 +36,12 @@ module Oneboxer if page_html.present? doc = Nokogiri::HTML(page_html) - if Whitelist.oembed_allowed?(url) - # See if if it has an oembed thing we can use - (doc/"link[@type='application/json+oembed']").each do |oembed| - return OembedOnebox.new(oembed[:href]).onebox - end - (doc/"link[@type='text/json+oembed']").each do |oembed| - return OembedOnebox.new(oembed[:href]).onebox - end + # See if if it has an oembed thing we can use + (doc/"link[@type='application/json+oembed']").each do |oembed| + return OembedOnebox.new(oembed[:href]).onebox + end + (doc/"link[@type='text/json+oembed']").each do |oembed| + return OembedOnebox.new(oembed[:href]).onebox end # Check for opengraph diff --git a/lib/oneboxer/clikthrough_onebox.rb b/lib/oneboxer/clikthrough_onebox.rb index 7869ea895e4..8561c655591 100644 --- a/lib/oneboxer/clikthrough_onebox.rb +++ b/lib/oneboxer/clikthrough_onebox.rb @@ -3,7 +3,7 @@ require_dependency 'oneboxer/oembed_onebox' module Oneboxer class ClikthroughOnebox < OembedOnebox - matcher /^https?:\/\/(?:www\.)?clikthrough\.com\/theater\/video\/\d+$/ + matcher /clikthrough\.com\/theater\/video\/\d+$/ def oembed_endpoint "http://clikthrough.com/services/oembed?url=#{BaseOnebox.uriencode(@url)}" diff --git a/lib/oneboxer/dailymotion_onebox.rb b/lib/oneboxer/dailymotion_onebox.rb index fccabbb02bc..83ae8647ca9 100644 --- a/lib/oneboxer/dailymotion_onebox.rb +++ b/lib/oneboxer/dailymotion_onebox.rb @@ -3,7 +3,7 @@ require_dependency 'oneboxer/oembed_onebox' module Oneboxer class DailymotionOnebox < OembedOnebox - matcher /^https?:\/\/(?:www\.)?dailymotion\.com\/.+$/ + matcher /dailymotion\.com\/.+$/ def oembed_endpoint "http://www.dailymotion.com/api/oembed/?url=#{BaseOnebox.uriencode(@url)}" diff --git a/lib/oneboxer/dotsub_onebox.rb b/lib/oneboxer/dotsub_onebox.rb index 96bbabe5b84..2d524e9372c 100644 --- a/lib/oneboxer/dotsub_onebox.rb +++ b/lib/oneboxer/dotsub_onebox.rb @@ -3,7 +3,7 @@ require_dependency 'oneboxer/oembed_onebox' module Oneboxer class DotsubOnebox < OembedOnebox - matcher /^https?:\/\/(?:www\.)?dotsub\.com\/.+$/ + matcher /dotsub\.com\/.+$/ def oembed_endpoint "http://dotsub.com/services/oembed?url=#{BaseOnebox.uriencode(@url)}" diff --git a/lib/oneboxer/github_blob_onebox.rb b/lib/oneboxer/github_blob_onebox.rb index 859ccf15cf4..59fb282dcaf 100644 --- a/lib/oneboxer/github_blob_onebox.rb +++ b/lib/oneboxer/github_blob_onebox.rb @@ -3,7 +3,7 @@ require_dependency 'oneboxer/handlebars_onebox' module Oneboxer class GithubBlobOnebox < HandlebarsOnebox - matcher /^https?:\/\/(?:www\.)?github\.com\/[^\/]+\/[^\/]+\/blob\/.*/ + matcher /github\.com\/[^\/]+\/[^\/]+\/blob\/.*/ favicon 'github.png' def translate_url diff --git a/lib/oneboxer/kinomap_onebox.rb b/lib/oneboxer/kinomap_onebox.rb index cb427ee4fe8..1e1af58dea2 100644 --- a/lib/oneboxer/kinomap_onebox.rb +++ b/lib/oneboxer/kinomap_onebox.rb @@ -3,7 +3,7 @@ require_dependency 'oneboxer/oembed_onebox' module Oneboxer class KinomapOnebox < OembedOnebox - matcher /^https?:\/\/(?:www\.)?kinomap\.com/ + matcher /kinomap\.com/ def oembed_endpoint "http://www.kinomap.com/oembed?url=#{BaseOnebox.uriencode(@url)}&format=json" diff --git a/lib/oneboxer/nfb_onebox.rb b/lib/oneboxer/nfb_onebox.rb index f749c097fc5..f8677f0d42f 100644 --- a/lib/oneboxer/nfb_onebox.rb +++ b/lib/oneboxer/nfb_onebox.rb @@ -3,7 +3,7 @@ require_dependency 'oneboxer/oembed_onebox' module Oneboxer class NfbOnebox < OembedOnebox - matcher /^https?:\/\/(?:www\.)?nfb\.ca\/film\/[-\w]+\/?/ + matcher /nfb\.ca\/film\/[-\w]+\/?/ def oembed_endpoint "http://www.nfb.ca/remote/services/oembed/?url=#{BaseOnebox.uriencode(@url)}&format=json" diff --git a/lib/oneboxer/open_graph_onebox.rb b/lib/oneboxer/open_graph_onebox.rb index 53e4a4349ff..427310ccdaf 100644 --- a/lib/oneboxer/open_graph_onebox.rb +++ b/lib/oneboxer/open_graph_onebox.rb @@ -20,7 +20,6 @@ module Oneboxer @opts[:original_url] = @url @opts[:text] = @opts['description'] - @opts[:unsafe] = true begin parsed = URI.parse(@url) diff --git a/lib/oneboxer/templates/simple_onebox.hbrs b/lib/oneboxer/templates/simple_onebox.hbrs index 5eda1f98ff0..340a89fb526 100644 --- a/lib/oneboxer/templates/simple_onebox.hbrs +++ b/lib/oneboxer/templates/simple_onebox.hbrs @@ -12,12 +12,7 @@ {{#image}}{{/image}}

{{title}}

{{#by_info}}

{{by_info}}

{{/by_info}} - {{#unsafe}} - {{text}} - {{/unsafe}} - {{^unsafe}} - {{{text}}} - {{/unsafe}} + {{{text}}}
diff --git a/lib/oneboxer/viddler_onebox.rb b/lib/oneboxer/viddler_onebox.rb index 31a150b5ff6..987e05cd143 100644 --- a/lib/oneboxer/viddler_onebox.rb +++ b/lib/oneboxer/viddler_onebox.rb @@ -3,7 +3,7 @@ require_dependency 'oneboxer/oembed_onebox' module Oneboxer class ViddlerOnebox < OembedOnebox - matcher /^https?:\/\/(?:www\.)?viddler\.com\/.+$/ + matcher /viddler\.com\/.+$/ def oembed_endpoint "http://lab.viddler.com/services/oembed/?url=#{BaseOnebox.uriencode(@url)}" diff --git a/lib/oneboxer/whitelist.rb b/lib/oneboxer/whitelist.rb index caad33bafa2..38f27760622 100644 --- a/lib/oneboxer/whitelist.rb +++ b/lib/oneboxer/whitelist.rb @@ -2,113 +2,84 @@ module Oneboxer module Whitelist def self.entries - [ - Entry.new(/^https?:\/\/(?:www\.)?cnn\.com\/.+/), - Entry.new(/^https?:\/\/(?:www\.)?washingtonpost\.com\/.+/), - Entry.new(/^https?:\/\/(?:www\.)?funnyordie\.com\/.+/), - Entry.new(/^https?:\/\/(?:www\.)?youtube\.com\/.+/), - Entry.new(/^https?:\/\/(?:www\.)?youtu\.be\/.+/), - Entry.new(/^https?:\/\/(?:www\.)?500px\.com\/.+/), - Entry.new(/^https?:\/\/(?:www\.)?scribd\.com\/.+/), - Entry.new(/^https?:\/\/(?:www\.)?photobucket\.com\/.+/), - Entry.new(/^https?:\/\/(?:www\.)?ebay\.(com|ca|co\.uk)\/.+/), - Entry.new(/^https?:\/\/(?:www\.)?nytimes\.com\/.+/), - Entry.new(/^https?:\/\/(?:www\.)?pinterest\.com\/.+/), - Entry.new(/^https?:\/\/(?:www\.)?imdb\.com\/.+/), - Entry.new(/^https?:\/\/(?:www\.)?bbc\.co\.uk\/.+/), - Entry.new(/^https?:\/\/(?:www\.)?ask\.com\/.+/), - Entry.new(/^https?:\/\/(?:www\.)?huffingtonpost\.com\/.+/), - Entry.new(/^https?:\/\/(?:www\.)?aol\.(com|ca)\/.+/), - Entry.new(/^https?:\/\/(?:www\.)?espn\.go\.com\/.+/), - Entry.new(/^https?:\/\/(?:www\.)?about\.com\/.+/), - Entry.new(/^https?:\/\/(?:www\.)?cnet\.com\/.+/), - Entry.new(/^https?:\/\/(?:www\.)?ehow\.com\/.+/), - Entry.new(/^https?:\/\/(?:www\.)?dailymail\.co\.uk\/.+/), - Entry.new(/^https?:\/\/(?:www\.)?indiatimes\.com\/.+/), - Entry.new(/^https?:\/\/(?:www\.)?answers\.com\/.+/), - Entry.new(/^https?:\/\/(?:www\.)?instagr\.am\/.+/), - Entry.new(/^https?:\/\/(?:www\.)?battle\.net\/.+/), - Entry.new(/^https?:\/\/(?:www\.)?sourceforge\.net\/.+/), - Entry.new(/^https?:\/\/(?:www\.)?myspace\.com\/.+/), - Entry.new(/^https?:\/\/(?:www\.)?wikia\.com\/.+/), - Entry.new(/^https?:\/\/(?:www\.)?etsy\.com\/.+/), - Entry.new(/^https?:\/\/(?:www\.)?walmart\.com\/.+/), - Entry.new(/^https?:\/\/(?:www\.)?reference\.com\/.+/), - Entry.new(/^https?:\/\/(?:www\.)?yelp\.com\/.+/), - Entry.new(/^https?:\/\/(?:www\.)?foxnews\.com\/.+/), - Entry.new(/^https?:\/\/(?:www\.)?guardian\.co\.uk\/.+/), - Entry.new(/^https?:\/\/(?:www\.)?digg\.com\/.+/), - Entry.new(/^https?:\/\/(?:www\.)?squidoo\.com\/.+/), - Entry.new(/^https?:\/\/(?:www\.)?wsj\.com\/.+/), - Entry.new(/^https?:\/\/(?:www\.)?archive\.org\/.+/), - Entry.new(/^https?:\/\/(?:www\.)?nba\.com\/.+/), - Entry.new(/^https?:\/\/(?:www\.)?samsung\.com\/.+/), - Entry.new(/^https?:\/\/(?:www\.)?mashable\.com\/.+/), - Entry.new(/^https?:\/\/(?:www\.)?forbes\.com\/.+/), - Entry.new(/^https?:\/\/(?:www\.)?soundcloud\.com\/.+/), - Entry.new(/^https?:\/\/(?:www\.)?thefreedictionary\.com\/.+/), - Entry.new(/^https?:\/\/(?:www\.)?groupon\.com\/.+/), - Entry.new(/^https?:\/\/(?:www\.)?ikea\.com\/.+/), - Entry.new(/^https?:\/\/(?:www\.)?dell\.com\/.+/), - Entry.new(/^https?:\/\/(?:www\.)?mlb\.com\/.+/), - Entry.new(/^https?:\/\/(?:www\.)?bestbuy\.(com|ca)\/.+/), - Entry.new(/^https?:\/\/(?:www\.)?bloomberg\.com\/.+/), - Entry.new(/^https?:\/\/(?:www\.)?ign\.com\/.+/), - Entry.new(/^https?:\/\/(?:www\.)?twitpic\.com\/.+/), - Entry.new(/^https?:\/\/(?:www\.)?techcrunch\.com\/.+/), - Entry.new(/^https?:\/\/(?:www\.)?usatoday\.com\/.+/), - Entry.new(/^https?:\/\/(?:www\.)?go\.com\/.+/), - Entry.new(/^https?:\/\/(?:www\.)?businessinsider\.com\/.+/), - Entry.new(/^https?:\/\/(?:www\.)?zillow\.com\/.+/), - Entry.new(/^https?:\/\/(?:www\.)?tmz\.com\/.+/), - Entry.new(/^https?:\/\/(?:www\.)?thesun\.co\.uk\/.+/), - Entry.new(/^https?:\/\/(?:www\.)?thestar\.(com|ca)\/.+/), - Entry.new(/^https?:\/\/(?:www\.)?theglobeandmail\.com\/.+/), - Entry.new(/^https?:\/\/(?:www\.)?torontosun\.com\/.+/), - Entry.new(/^https?:\/\/(?:www\.)?kickstarter\.com\/.+/), - Entry.new(/^https?:\/\/(?:www\.)?wired\.com\/.+/), - Entry.new(/^https?:\/\/(?:www\.)?time\.com\/.+/), - Entry.new(/^https?:\/\/(?:www\.)?npr\.org\/.+/), - Entry.new(/^https?:\/\/(?:www\.)?cracked\.com\/.+/), - Entry.new(/^https?:\/\/(?:www\.)?deadline\.com\/.+/), - Entry.new(/^https?:\/\/(?:www\.)?tumblr\.com\/.+/, false), - Entry.new(/\/\d{4}\/\d{2}\/\d{2}\//, false), # wordpress - ] + [/cnn\.com\/.+/, + /washingtonpost\.com\/.+/, + /\/\d{4}\/\d{2}\/\d{2}\//, # wordpress + /funnyordie\.com\/.+/, + /youtube\.com\/.+/, + /youtu\.be\/.+/, + /500px\.com\/.+/, + /scribd\.com\/.+/, + /photobucket\.com\/.+/, + /ebay\.(com|ca|co\.uk)\/.+/, + /nytimes\.com\/.+/, + /tumblr\.com\/.+/, + /pinterest\.com\/.+/, + /imdb\.com\/.+/, + /bbc\.co\.uk\/.+/, + /ask\.com\/.+/, + /huffingtonpost\.com\/.+/, + /aol\.(com|ca)\/.+/, + /espn\.go\.com\/.+/, + /about\.com\/.+/, + /cnet\.com\/.+/, + /ehow\.com\/.+/, + /dailymail\.co\.uk\/.+/, + /indiatimes\.com\/.+/, + /answers\.com\/.+/, + /instagr\.am\/.+/, + /battle\.net\/.+/, + /sourceforge\.net\/.+/, + /myspace\.com\/.+/, + /wikia\.com\/.+/, + /etsy\.com\/.+/, + /walmart\.com\/.+/, + /reference\.com\/.+/, + /yelp\.com\/.+/, + /foxnews\.com\/.+/, + /guardian\.co\.uk\/.+/, + /digg\.com\/.+/, + /squidoo\.com\/.+/, + /wsj\.com\/.+/, + /archive\.org\/.+/, + /nba\.com\/.+/, + /samsung\.com\/.+/, + /mashable\.com\/.+/, + /forbes\.com\/.+/, + /soundcloud\.com\/.+/, + /thefreedictionary\.com\/.+/, + /groupon\.com\/.+/, + /ikea\.com\/.+/, + /dell\.com\/.+/, + /mlb\.com\/.+/, + /bestbuy\.(com|ca)\/.+/, + /bloomberg\.com\/.+/, + /ign\.com\/.+/, + /twitpic\.com\/.+/, + /techcrunch\.com\/.+/, + /usatoday\.com\/.+/, + /go\.com\/.+/, + /businessinsider\.com\/.+/, + /zillow\.com\/.+/, + /tmz\.com\/.+/, + /thesun\.co\.uk\/.+/, + /thestar\.(com|ca)\/.+/, + /theglobeandmail\.com\/.+/, + /torontosun\.com\/.+/, + /kickstarter\.com\/.+/, + /wired\.com\/.+/, + /time\.com\/.+/, + /npr\.org\/.+/, + /cracked\.com\/.+/, + /deadline\.com\/.+/ + ] end def self.allowed?(url) - !match(url).nil? - end - - def self.oembed_allowed?(url) - unless (e = match(url)).nil? - return e.allows_oembed? - end + #return true + entries.each {|e| return true if url =~ e } false end - - private - - def self.match(url) - entries.each {|e| return e if e.matches?(url) } - nil - end - - class Entry - # oembed = false is probably safer, but this is the least-drastic change - def initialize(pattern, oembed = true) - @pattern = pattern - @oembed = oembed - end - - def allows_oembed? - @oembed - end - - def matches?(url) - url =~ @pattern - end - end end diff --git a/lib/oneboxer/yfrog_onebox.rb b/lib/oneboxer/yfrog_onebox.rb index 96c72918cfa..6090930d9cc 100644 --- a/lib/oneboxer/yfrog_onebox.rb +++ b/lib/oneboxer/yfrog_onebox.rb @@ -3,7 +3,7 @@ require_dependency 'oneboxer/oembed_onebox' module Oneboxer class YfrogOnebox < OembedOnebox - matcher /^https?:\/\/(?:www\.)?yfrog\.(com|ru|com\.tr|it|fr|co\.il|co\.uk|com\.pl|pl|eu|us)\/[a-zA-Z0-9]+/ + matcher /yfrog\.(com|ru|com\.tr|it|fr|co\.il|co\.uk|com\.pl|pl|eu|us)\/[a-zA-Z0-9]+/ def oembed_endpoint "http://www.yfrog.com/api/oembed/?url=#{BaseOnebox.uriencode(@url)}&format=json"