discourse/lib/onebox/engine/standard_embed.rb

198 lines
5.1 KiB
Ruby

# frozen_string_literal: true
require "cgi"
require "onebox/normalizer"
require "onebox/open_graph"
require "onebox/oembed"
require "onebox/json_ld"
module Onebox
module Engine
module StandardEmbed
def self.oembed_providers
@@oembed_providers ||= {}
end
def self.add_oembed_provider(regexp, endpoint)
oembed_providers[regexp] = endpoint
end
def self.opengraph_providers
@@opengraph_providers ||= []
end
def self.add_opengraph_provider(regexp)
opengraph_providers << regexp
end
# Some oembed providers (like meetup.com) don't provide links to themselves
add_oembed_provider(%r{www\.meetup\.com/}, "http://api.meetup.com/oembed")
add_oembed_provider(%r{www\.mixcloud\.com/}, "https://www.mixcloud.com/oembed/")
# In order to support Private Videos
add_oembed_provider(%r{vimeo\.com/}, "https://vimeo.com/api/oembed.json")
# NYT requires login so use oembed only
add_oembed_provider(%r{nytimes\.com/}, "https://www.nytimes.com/svc/oembed/json/")
def always_https?
AllowlistedGenericOnebox.host_matches(uri, AllowlistedGenericOnebox.https_hosts) || super
end
def raw
return @raw if defined?(@raw)
@raw = {}
set_opengraph_data_on_raw
set_twitter_data_on_raw
set_oembed_data_on_raw
set_json_ld_data_on_raw
set_favicon_data_on_raw
set_description_on_raw
@raw
end
protected
def html_doc
return @html_doc if defined?(@html_doc)
headers = nil
headers = { "Cookie" => options[:cookie] } if options[:cookie]
@html_doc = Onebox::Helpers.fetch_html_doc(url, headers)
end
def get_oembed
@oembed ||= Onebox::Oembed.new(get_json_response)
end
def get_opengraph
@opengraph ||= ::Onebox::OpenGraph.new(html_doc)
end
def get_twitter
return {} unless html_doc
twitter = {}
html_doc
.css("meta")
.each do |m|
if (m["property"] && m["property"][/^twitter:(.+)$/i]) ||
(m["name"] && m["name"][/^twitter:(.+)$/i])
value = (m["content"] || m["value"]).to_s
twitter[$1.tr("-:", "_").to_sym] ||= value if (value.present? && value != "0 minutes")
end
end
twitter
end
def get_favicon
return nil unless html_doc
favicon =
html_doc.css(
'link[rel="shortcut icon"], link[rel="icon shortcut"], link[rel="shortcut"], link[rel="icon"]',
).first
favicon = favicon.nil? ? nil : (favicon["href"].nil? ? nil : favicon["href"].strip)
Onebox::Helpers.get_absolute_image_url(favicon, url)
end
def get_description
return nil unless html_doc
description = html_doc.at("meta[name='description']").to_h["content"]
description ||= html_doc.at("meta[name='Description']").to_h["content"]
description
end
def get_json_response
oembed_url = get_oembed_url
return "{}" if oembed_url.blank?
begin
Onebox::Helpers.fetch_response(oembed_url)
rescue StandardError
"{}"
end
rescue Errno::ECONNREFUSED, Net::HTTPError, Net::HTTPFatalError, MultiJson::LoadError
"{}"
end
def get_oembed_url
oembed_url = nil
StandardEmbed.oembed_providers.each do |regexp, endpoint|
if url =~ regexp
oembed_url = "#{endpoint}?url=#{url}"
break
end
end
if html_doc
if oembed_url.blank?
application_json = html_doc.at("//link[@type='application/json+oembed']/@href")
oembed_url = application_json.value if application_json
end
if oembed_url.blank?
text_json = html_doc.at("//link[@type='text/json+oembed']/@href")
oembed_url ||= text_json.value if text_json
end
end
oembed_url
end
def get_json_ld
@json_ld ||= Onebox::JsonLd.new(html_doc)
end
def set_from_normalizer_data(normalizer)
normalizer.data.each do |k, v|
v = normalizer.send(k)
@raw[k] ||= v unless v.nil?
end
end
def set_opengraph_data_on_raw
og = get_opengraph
set_from_normalizer_data(og)
@raw.except!(:title_attr)
end
def set_twitter_data_on_raw
twitter = get_twitter
twitter.each { |k, v| @raw[k] ||= v if v.present? }
end
def set_oembed_data_on_raw
oembed = get_oembed
set_from_normalizer_data(oembed)
end
def set_json_ld_data_on_raw
json_ld = get_json_ld
set_from_normalizer_data(json_ld)
end
def set_favicon_data_on_raw
favicon = get_favicon
@raw[:favicon] = favicon if favicon.present?
end
def set_description_on_raw
unless @raw[:description]
description = get_description
@raw[:description] = description if description.present?
end
end
end
end
end