2021-05-26 05:41:35 -04:00
|
|
|
# frozen_string_literal: true
|
|
|
|
|
2022-06-13 11:32:34 -04:00
|
|
|
require "cgi"
|
|
|
|
require "onebox/normalizer"
|
|
|
|
require "onebox/open_graph"
|
2021-05-26 05:41:35 -04:00
|
|
|
require "onebox/oembed"
|
2022-06-13 11:32:34 -04:00
|
|
|
require "onebox/json_ld"
|
2021-05-26 05:41:35 -04:00
|
|
|
|
|
|
|
module Onebox
|
|
|
|
module Engine
|
|
|
|
module StandardEmbed
|
|
|
|
def self.oembed_providers
|
|
|
|
@@oembed_providers ||= {}
|
|
|
|
end
|
|
|
|
|
|
|
|
def self.add_oembed_provider(regexp, endpoint)
|
|
|
|
oembed_providers[regexp] = endpoint
|
|
|
|
end
|
|
|
|
|
|
|
|
def self.opengraph_providers
|
|
|
|
@@opengraph_providers ||= []
|
|
|
|
end
|
|
|
|
|
|
|
|
def self.add_opengraph_provider(regexp)
|
|
|
|
opengraph_providers << regexp
|
|
|
|
end
|
|
|
|
|
|
|
|
# Some oembed providers (like meetup.com) don't provide links to themselves
|
|
|
|
add_oembed_provider(%r{www\.meetup\.com/}, "http://api.meetup.com/oembed")
|
|
|
|
add_oembed_provider(%r{www\.mixcloud\.com/}, "https://www.mixcloud.com/oembed/")
|
|
|
|
# In order to support Private Videos
|
|
|
|
add_oembed_provider(%r{vimeo\.com/}, "https://vimeo.com/api/oembed.json")
|
|
|
|
# NYT requires login so use oembed only
|
|
|
|
add_oembed_provider(%r{nytimes\.com/}, "https://www.nytimes.com/svc/oembed/json/")
|
|
|
|
|
|
|
|
def always_https?
|
|
|
|
AllowlistedGenericOnebox.host_matches(uri, AllowlistedGenericOnebox.https_hosts) || super
|
|
|
|
end
|
|
|
|
|
|
|
|
def raw
|
|
|
|
return @raw if defined?(@raw)
|
|
|
|
|
|
|
|
@raw = {}
|
|
|
|
|
2022-06-13 11:32:34 -04:00
|
|
|
set_opengraph_data_on_raw
|
|
|
|
set_twitter_data_on_raw
|
|
|
|
set_oembed_data_on_raw
|
|
|
|
set_json_ld_data_on_raw
|
|
|
|
set_favicon_data_on_raw
|
|
|
|
set_description_on_raw
|
2021-05-26 05:41:35 -04:00
|
|
|
|
|
|
|
@raw
|
|
|
|
end
|
|
|
|
|
|
|
|
protected
|
|
|
|
|
|
|
|
def html_doc
|
|
|
|
return @html_doc if defined?(@html_doc)
|
|
|
|
|
|
|
|
headers = nil
|
|
|
|
headers = { "Cookie" => options[:cookie] } if options[:cookie]
|
|
|
|
|
|
|
|
@html_doc = Onebox::Helpers.fetch_html_doc(url, headers)
|
|
|
|
end
|
|
|
|
|
|
|
|
def get_oembed
|
|
|
|
@oembed ||= Onebox::Oembed.new(get_json_response)
|
|
|
|
end
|
|
|
|
|
|
|
|
def get_opengraph
|
|
|
|
@opengraph ||= ::Onebox::OpenGraph.new(html_doc)
|
|
|
|
end
|
|
|
|
|
|
|
|
def get_twitter
|
|
|
|
return {} unless html_doc
|
|
|
|
|
|
|
|
twitter = {}
|
|
|
|
|
|
|
|
html_doc
|
|
|
|
.css("meta")
|
|
|
|
.each do |m|
|
|
|
|
if (m["property"] && m["property"][/^twitter:(.+)$/i]) ||
|
|
|
|
(m["name"] && m["name"][/^twitter:(.+)$/i])
|
|
|
|
value = (m["content"] || m["value"]).to_s
|
2023-10-07 13:54:26 -04:00
|
|
|
twitter[$1.tr("-:", "_").to_sym] ||= value if (value.present? && value != "0 minutes")
|
2023-01-09 07:10:19 -05:00
|
|
|
end
|
2021-05-26 05:41:35 -04:00
|
|
|
end
|
|
|
|
|
|
|
|
twitter
|
|
|
|
end
|
|
|
|
|
|
|
|
def get_favicon
|
|
|
|
return nil unless html_doc
|
|
|
|
|
|
|
|
favicon =
|
|
|
|
html_doc.css(
|
|
|
|
'link[rel="shortcut icon"], link[rel="icon shortcut"], link[rel="shortcut"], link[rel="icon"]',
|
|
|
|
).first
|
|
|
|
favicon = favicon.nil? ? nil : (favicon["href"].nil? ? nil : favicon["href"].strip)
|
|
|
|
|
|
|
|
Onebox::Helpers.get_absolute_image_url(favicon, url)
|
|
|
|
end
|
|
|
|
|
2021-12-17 19:36:54 -05:00
|
|
|
def get_description
|
|
|
|
return nil unless html_doc
|
|
|
|
|
|
|
|
description = html_doc.at("meta[name='description']").to_h["content"]
|
|
|
|
description ||= html_doc.at("meta[name='Description']").to_h["content"]
|
|
|
|
|
|
|
|
description
|
|
|
|
end
|
|
|
|
|
2021-05-26 05:41:35 -04:00
|
|
|
def get_json_response
|
|
|
|
oembed_url = get_oembed_url
|
|
|
|
|
2023-10-07 13:54:26 -04:00
|
|
|
return "{}" if oembed_url.blank?
|
2021-05-26 05:41:35 -04:00
|
|
|
|
2023-01-09 07:10:19 -05:00
|
|
|
begin
|
2021-05-26 05:41:35 -04:00
|
|
|
Onebox::Helpers.fetch_response(oembed_url)
|
|
|
|
rescue StandardError
|
|
|
|
"{}"
|
2023-01-09 07:10:19 -05:00
|
|
|
end
|
2021-05-26 05:41:35 -04:00
|
|
|
rescue Errno::ECONNREFUSED, Net::HTTPError, Net::HTTPFatalError, MultiJson::LoadError
|
|
|
|
"{}"
|
|
|
|
end
|
|
|
|
|
|
|
|
def get_oembed_url
|
|
|
|
oembed_url = nil
|
|
|
|
|
|
|
|
StandardEmbed.oembed_providers.each do |regexp, endpoint|
|
|
|
|
if url =~ regexp
|
|
|
|
oembed_url = "#{endpoint}?url=#{url}"
|
|
|
|
break
|
|
|
|
end
|
|
|
|
end
|
|
|
|
|
|
|
|
if html_doc
|
2023-10-07 13:54:26 -04:00
|
|
|
if oembed_url.blank?
|
2021-05-26 05:41:35 -04:00
|
|
|
application_json = html_doc.at("//link[@type='application/json+oembed']/@href")
|
|
|
|
oembed_url = application_json.value if application_json
|
|
|
|
end
|
|
|
|
|
2023-10-07 13:54:26 -04:00
|
|
|
if oembed_url.blank?
|
2021-05-26 05:41:35 -04:00
|
|
|
text_json = html_doc.at("//link[@type='text/json+oembed']/@href")
|
|
|
|
oembed_url ||= text_json.value if text_json
|
|
|
|
end
|
|
|
|
end
|
|
|
|
|
|
|
|
oembed_url
|
|
|
|
end
|
2022-06-13 11:32:34 -04:00
|
|
|
|
|
|
|
def get_json_ld
|
|
|
|
@json_ld ||= Onebox::JsonLd.new(html_doc)
|
|
|
|
end
|
|
|
|
|
|
|
|
def set_from_normalizer_data(normalizer)
|
|
|
|
normalizer.data.each do |k, v|
|
|
|
|
v = normalizer.send(k)
|
|
|
|
@raw[k] ||= v unless v.nil?
|
|
|
|
end
|
|
|
|
end
|
|
|
|
|
|
|
|
def set_opengraph_data_on_raw
|
|
|
|
og = get_opengraph
|
|
|
|
set_from_normalizer_data(og)
|
|
|
|
@raw.except!(:title_attr)
|
|
|
|
end
|
|
|
|
|
|
|
|
def set_twitter_data_on_raw
|
|
|
|
twitter = get_twitter
|
2023-10-07 13:54:26 -04:00
|
|
|
twitter.each { |k, v| @raw[k] ||= v if v.present? }
|
2022-06-13 11:32:34 -04:00
|
|
|
end
|
|
|
|
|
|
|
|
def set_oembed_data_on_raw
|
|
|
|
oembed = get_oembed
|
|
|
|
set_from_normalizer_data(oembed)
|
|
|
|
end
|
|
|
|
|
|
|
|
def set_json_ld_data_on_raw
|
|
|
|
json_ld = get_json_ld
|
|
|
|
set_from_normalizer_data(json_ld)
|
|
|
|
end
|
|
|
|
|
|
|
|
def set_favicon_data_on_raw
|
|
|
|
favicon = get_favicon
|
2023-10-07 13:54:26 -04:00
|
|
|
@raw[:favicon] = favicon if favicon.present?
|
2022-06-13 11:32:34 -04:00
|
|
|
end
|
|
|
|
|
|
|
|
def set_description_on_raw
|
|
|
|
unless @raw[:description]
|
|
|
|
description = get_description
|
2023-10-07 13:54:26 -04:00
|
|
|
@raw[:description] = description if description.present?
|
2022-06-13 11:32:34 -04:00
|
|
|
end
|
|
|
|
end
|
2021-05-26 05:41:35 -04:00
|
|
|
end
|
|
|
|
end
|
|
|
|
end
|