2021-05-26 05:41:35 -04:00
|
|
|
# frozen_string_literal: true
|
|
|
|
|
|
|
|
module Onebox
|
2022-06-13 11:32:34 -04:00
|
|
|
class OpenGraph < Normalizer
|
2021-05-26 05:41:35 -04:00
|
|
|
def initialize(doc)
|
|
|
|
@data = extract(doc)
|
|
|
|
end
|
|
|
|
|
|
|
|
def title
|
|
|
|
get(:title, 80)
|
|
|
|
end
|
|
|
|
|
|
|
|
def title_attr
|
|
|
|
!title.nil? ? "title='#{title}'" : ""
|
|
|
|
end
|
|
|
|
|
|
|
|
def secure_image_url
|
|
|
|
secure_url = URI(get(:image))
|
|
|
|
secure_url.scheme = "https"
|
|
|
|
secure_url.to_s
|
|
|
|
end
|
|
|
|
|
|
|
|
private
|
|
|
|
|
2022-11-24 10:28:21 -05:00
|
|
|
COLLECTIONS = %i[article_section article_section_color article_tag]
|
|
|
|
|
2021-05-26 05:41:35 -04:00
|
|
|
def extract(doc)
|
|
|
|
return {} if Onebox::Helpers.blank?(doc)
|
|
|
|
|
|
|
|
data = {}
|
|
|
|
|
|
|
|
doc
|
|
|
|
.css("meta")
|
|
|
|
.each do |m|
|
2023-01-20 13:52:49 -05:00
|
|
|
if (m["property"] && m["property"][/\A(?:og|article|product):(.+)\z/i]) ||
|
|
|
|
(m["name"] && m["name"][/\A(?:og|article|product):(.+)\z/i])
|
2021-05-26 05:41:35 -04:00
|
|
|
value = (m["content"] || m["value"]).to_s
|
2022-11-24 10:28:21 -05:00
|
|
|
next if Onebox::Helpers.blank?(value)
|
|
|
|
key = $1.tr("-:", "_").to_sym
|
|
|
|
data[key] ||= value
|
|
|
|
if key.in?(COLLECTIONS)
|
|
|
|
collection_name = "#{key}s".to_sym
|
|
|
|
data[collection_name] ||= []
|
|
|
|
data[collection_name] << value
|
|
|
|
end
|
2023-01-09 07:10:19 -05:00
|
|
|
end
|
2021-05-26 05:41:35 -04:00
|
|
|
end
|
|
|
|
|
|
|
|
# Attempt to retrieve the title from the meta tag
|
|
|
|
title_element = doc.at_css("title")
|
|
|
|
if title_element && title_element.text
|
|
|
|
data[:title] ||= title_element.text unless Onebox::Helpers.blank?(title_element.text)
|
|
|
|
end
|
|
|
|
|
|
|
|
data
|
|
|
|
end
|
|
|
|
end
|
|
|
|
end
|