2014-02-25 13:35:08 -05:00
|
|
|
|
|
|
|
Dir["#{Rails.root}/lib/onebox/engine/*_onebox.rb"].each {|f|
|
|
|
|
require_dependency(f.split('/')[-3..-1].join('/'))
|
|
|
|
}
|
|
|
|
|
2013-02-05 14:16:51 -05:00
|
|
|
module Oneboxer
|
2014-01-27 15:09:09 -05:00
|
|
|
|
2014-02-25 13:35:08 -05:00
|
|
|
|
2013-04-29 22:43:21 -04:00
|
|
|
# keep reloaders happy
|
|
|
|
unless defined? Oneboxer::Result
|
|
|
|
Result = Struct.new(:doc, :changed) do
|
|
|
|
def to_html
|
|
|
|
doc.to_html
|
|
|
|
end
|
2013-04-10 03:52:38 -04:00
|
|
|
|
2013-04-29 22:43:21 -04:00
|
|
|
def changed?
|
|
|
|
changed
|
|
|
|
end
|
2013-04-10 03:52:38 -04:00
|
|
|
end
|
|
|
|
end
|
|
|
|
|
2014-01-27 15:09:09 -05:00
|
|
|
def self.preview(url, options=nil)
|
|
|
|
options ||= {}
|
|
|
|
Oneboxer.invalidate(url) if options[:invalidate_oneboxes]
|
2014-04-01 00:29:14 -04:00
|
|
|
onebox_raw(url)[:preview]
|
2013-02-05 14:16:51 -05:00
|
|
|
end
|
|
|
|
|
2014-01-27 15:09:09 -05:00
|
|
|
def self.onebox(url, options=nil)
|
|
|
|
options ||= {}
|
|
|
|
Oneboxer.invalidate(url) if options[:invalidate_oneboxes]
|
2014-04-01 00:29:14 -04:00
|
|
|
onebox_raw(url)[:onebox]
|
2014-03-17 22:12:58 -04:00
|
|
|
end
|
|
|
|
|
|
|
|
def self.cached_onebox(url)
|
2014-04-01 00:29:14 -04:00
|
|
|
if c = Rails.cache.read(onebox_cache_key(url))
|
|
|
|
c[:onebox]
|
|
|
|
end
|
2014-05-28 03:15:10 -04:00
|
|
|
rescue => e
|
|
|
|
invalidate(url)
|
|
|
|
Rails.logger.warn("invalid cached onebox for #{url} #{e}")
|
|
|
|
""
|
2014-03-17 22:12:58 -04:00
|
|
|
end
|
|
|
|
|
|
|
|
def self.cached_preview(url)
|
2014-04-01 00:29:14 -04:00
|
|
|
if c = Rails.cache.read(onebox_cache_key(url))
|
|
|
|
c[:preview]
|
|
|
|
end
|
2014-05-28 03:15:10 -04:00
|
|
|
rescue => e
|
|
|
|
invalidate(url)
|
|
|
|
Rails.logger.warn("invalid cached preview for #{url} #{e}")
|
|
|
|
""
|
2013-08-14 11:05:53 -04:00
|
|
|
end
|
|
|
|
|
2014-01-28 13:18:19 -05:00
|
|
|
def self.oneboxer_exists_for_url?(url)
|
2014-01-27 15:09:09 -05:00
|
|
|
Onebox.has_matcher?(url)
|
2013-02-05 14:16:51 -05:00
|
|
|
end
|
|
|
|
|
2014-01-28 13:18:19 -05:00
|
|
|
def self.invalidate(url)
|
2014-03-17 22:12:58 -04:00
|
|
|
Rails.cache.delete(onebox_cache_key(url))
|
2013-02-05 14:16:51 -05:00
|
|
|
end
|
2014-01-28 13:18:19 -05:00
|
|
|
|
2013-02-05 14:16:51 -05:00
|
|
|
# Parse URLs out of HTML, returning the document when finished.
|
|
|
|
def self.each_onebox_link(string_or_doc)
|
|
|
|
doc = string_or_doc
|
2013-04-10 03:52:38 -04:00
|
|
|
doc = Nokogiri::HTML::fragment(doc) if doc.is_a?(String)
|
2013-02-05 14:16:51 -05:00
|
|
|
|
|
|
|
onebox_links = doc.search("a.onebox")
|
|
|
|
if onebox_links.present?
|
|
|
|
onebox_links.each do |link|
|
|
|
|
if link['href'].present?
|
|
|
|
yield link['href'], link
|
|
|
|
end
|
|
|
|
end
|
|
|
|
end
|
|
|
|
|
|
|
|
doc
|
|
|
|
end
|
|
|
|
|
2013-04-10 03:52:38 -04:00
|
|
|
def self.apply(string_or_doc)
|
|
|
|
doc = string_or_doc
|
|
|
|
doc = Nokogiri::HTML::fragment(doc) if doc.is_a?(String)
|
|
|
|
changed = false
|
|
|
|
|
|
|
|
Oneboxer.each_onebox_link(doc) do |url, element|
|
|
|
|
onebox, preview = yield(url,element)
|
|
|
|
if onebox
|
|
|
|
parsed_onebox = Nokogiri::HTML::fragment(onebox)
|
2013-05-01 02:37:27 -04:00
|
|
|
next unless parsed_onebox.children.count > 0
|
2013-04-10 03:52:38 -04:00
|
|
|
|
|
|
|
# special logic to strip empty p elements
|
2013-05-01 02:37:27 -04:00
|
|
|
if element.parent &&
|
|
|
|
element.parent.node_name.downcase == "p" &&
|
2014-07-04 16:09:51 -04:00
|
|
|
element.parent.children.count == 1
|
2013-05-01 02:37:27 -04:00
|
|
|
element = element.parent
|
2013-04-10 03:52:38 -04:00
|
|
|
end
|
|
|
|
changed = true
|
|
|
|
element.swap parsed_onebox.to_html
|
|
|
|
end
|
|
|
|
end
|
|
|
|
|
|
|
|
Result.new(doc, changed)
|
|
|
|
end
|
|
|
|
|
2014-03-17 22:12:58 -04:00
|
|
|
private
|
|
|
|
def self.onebox_cache_key(url)
|
2014-04-01 00:29:14 -04:00
|
|
|
"onebox__#{url}"
|
2014-03-17 22:12:58 -04:00
|
|
|
end
|
|
|
|
|
2014-04-09 16:57:45 -04:00
|
|
|
def self.add_discourse_whitelists
|
|
|
|
# Add custom domain whitelists
|
|
|
|
if SiteSetting.onebox_domains_whitelist.present?
|
|
|
|
domains = SiteSetting.onebox_domains_whitelist.split('|')
|
|
|
|
whitelist = Onebox::Engine::WhitelistedGenericOnebox.whitelist
|
|
|
|
whitelist.concat(domains)
|
|
|
|
whitelist.uniq!
|
|
|
|
end
|
|
|
|
end
|
|
|
|
|
2014-03-17 22:12:58 -04:00
|
|
|
def self.onebox_raw(url)
|
2014-05-28 03:15:10 -04:00
|
|
|
Rails.cache.fetch(onebox_cache_key(url), expires_in: 1.day){
|
|
|
|
# This might be able to move to whenever the SiteSetting changes?
|
|
|
|
Oneboxer.add_discourse_whitelists
|
|
|
|
|
2014-06-05 13:17:56 -04:00
|
|
|
r = Onebox.preview(url, cache: {}, max_width: 695)
|
2014-05-28 03:15:10 -04:00
|
|
|
{
|
|
|
|
onebox: r.to_s,
|
|
|
|
preview: r.try(:placeholder_html).to_s
|
|
|
|
}
|
2014-03-17 22:12:58 -04:00
|
|
|
}
|
2014-05-28 03:15:10 -04:00
|
|
|
rescue => e
|
2015-02-09 15:47:46 -05:00
|
|
|
Discourse.handle_job_exception(e, message: "While trying to onebox a URL", url: url)
|
2014-05-28 03:15:10 -04:00
|
|
|
# return a blank hash, so rest of the code works
|
|
|
|
{preview: "", onebox: ""}
|
2014-03-17 22:12:58 -04:00
|
|
|
end
|
|
|
|
|
2013-02-05 14:16:51 -05:00
|
|
|
end
|
2014-01-27 15:09:09 -05:00
|
|
|
|