2019-05-02 18:17:27 -04:00
# frozen_string_literal: true
2018-03-15 10:27:55 -04:00
require 'uri'
2017-05-22 16:42:19 -04:00
2016-10-24 19:25:44 -04:00
Dir [ " #{ Rails . root } /lib/onebox/engine/*_onebox.rb " ] . sort . each { | f | require f }
2014-02-25 13:35:08 -05:00
2013-02-05 14:16:51 -05:00
module Oneboxer
2018-11-27 03:00:31 -05:00
ONEBOX_CSS_CLASS = " onebox "
2019-10-31 09:13:24 -04:00
AUDIO_REGEX = / ^ \ .(mp3|og[ga]|opus|wav|m4[abpr]|aac|flac)$ /i
2020-01-22 18:41:39 -05:00
VIDEO_REGEX = / ^ \ .(mov|mp4|webm|m4v|3gp|ogv|avi|mpeg|ogv)$ /i
2018-11-27 03:00:31 -05:00
2013-04-29 22:43:21 -04:00
# keep reloaders happy
unless defined? Oneboxer :: Result
Result = Struct . new ( :doc , :changed ) do
def to_html
doc . to_html
end
2013-04-10 03:52:38 -04:00
2013-04-29 22:43:21 -04:00
def changed?
changed
end
2013-04-10 03:52:38 -04:00
end
end
2017-06-26 15:38:23 -04:00
def self . ignore_redirects
2019-11-27 04:22:28 -05:00
@ignore_redirects || = [ 'http://www.dropbox.com' , 'http://store.steampowered.com' , 'http://vimeo.com' , Discourse . base_url ]
2017-06-26 15:38:23 -04:00
end
2017-08-08 05:44:27 -04:00
def self . force_get_hosts
@force_get_hosts || = [ 'http://us.battle.net' ]
end
2019-11-07 04:14:43 -05:00
def self . force_custom_user_agent_hosts
2020-02-06 11:56:54 -05:00
SiteSetting . force_custom_user_agent_hosts . split ( '|' )
2019-11-07 04:14:43 -05:00
end
2018-10-10 06:39:03 -04:00
def self . allowed_post_types
@allowed_post_types || = [ Post . types [ :regular ] , Post . types [ :moderator_action ] ]
end
2014-01-27 15:09:09 -05:00
def self . preview ( url , options = nil )
options || = { }
2016-12-19 18:31:10 -05:00
invalidate ( url ) if options [ :invalidate_oneboxes ]
2018-02-13 18:39:44 -05:00
onebox_raw ( url , options ) [ :preview ]
2013-02-05 14:16:51 -05:00
end
2014-01-27 15:09:09 -05:00
def self . onebox ( url , options = nil )
options || = { }
2016-12-19 18:31:10 -05:00
invalidate ( url ) if options [ :invalidate_oneboxes ]
2018-02-13 18:39:44 -05:00
onebox_raw ( url , options ) [ :onebox ]
2014-03-17 22:12:58 -04:00
end
def self . cached_onebox ( url )
2019-11-26 20:35:14 -05:00
if c = Discourse . cache . read ( onebox_cache_key ( url ) )
2014-04-01 00:29:14 -04:00
c [ :onebox ]
end
2014-05-28 03:15:10 -04:00
rescue = > e
invalidate ( url )
Rails . logger . warn ( " invalid cached onebox for #{ url } #{ e } " )
" "
2014-03-17 22:12:58 -04:00
end
def self . cached_preview ( url )
2019-11-26 20:35:14 -05:00
if c = Discourse . cache . read ( onebox_cache_key ( url ) )
2014-04-01 00:29:14 -04:00
c [ :preview ]
end
2014-05-28 03:15:10 -04:00
rescue = > e
invalidate ( url )
Rails . logger . warn ( " invalid cached preview for #{ url } #{ e } " )
" "
2013-08-14 11:05:53 -04:00
end
2014-01-28 13:18:19 -05:00
def self . invalidate ( url )
2019-11-26 20:35:14 -05:00
Discourse . cache . delete ( onebox_cache_key ( url ) )
2019-11-27 16:48:29 -05:00
Discourse . cache . delete ( onebox_failed_cache_key ( url ) )
2013-02-05 14:16:51 -05:00
end
2014-01-28 13:18:19 -05:00
2013-02-05 14:16:51 -05:00
# Parse URLs out of HTML, returning the document when finished.
2018-11-27 03:00:31 -05:00
def self . each_onebox_link ( string_or_doc , extra_paths : [ ] )
2013-02-05 14:16:51 -05:00
doc = string_or_doc
2020-05-04 23:46:57 -04:00
doc = Nokogiri :: HTML5 :: fragment ( doc ) if doc . is_a? ( String )
2013-02-05 14:16:51 -05:00
2018-11-27 03:00:31 -05:00
onebox_links = doc . css ( " a. #{ ONEBOX_CSS_CLASS } " , * extra_paths )
2013-02-05 14:16:51 -05:00
if onebox_links . present?
onebox_links . each do | link |
2016-11-03 17:48:32 -04:00
yield ( link [ 'href' ] , link ) if link [ 'href' ] . present?
2013-02-05 14:16:51 -05:00
end
end
doc
end
2018-04-11 15:33:45 -04:00
HTML5_BLOCK_ELEMENTS || = %w{ address article aside blockquote canvas center dd div dl dt fieldset figcaption figure footer form h1 h2 h3 h4 h5 h6 header hgroup hr li main nav noscript ol output p pre section table tfoot ul video }
2018-11-27 03:00:31 -05:00
def self . apply ( string_or_doc , extra_paths : nil )
2013-04-10 03:52:38 -04:00
doc = string_or_doc
2020-05-04 23:46:57 -04:00
doc = Nokogiri :: HTML5 :: fragment ( doc ) if doc . is_a? ( String )
2013-04-10 03:52:38 -04:00
changed = false
2018-11-27 03:00:31 -05:00
each_onebox_link ( doc , extra_paths : extra_paths ) do | url , element |
2018-02-13 18:39:44 -05:00
onebox , _ = yield ( url , element )
2018-11-27 03:00:31 -05:00
2013-04-10 03:52:38 -04:00
if onebox
2020-05-04 23:46:57 -04:00
parsed_onebox = Nokogiri :: HTML5 :: fragment ( onebox )
2013-05-01 02:37:27 -04:00
next unless parsed_onebox . children . count > 0
2013-04-10 03:52:38 -04:00
2018-04-11 15:33:45 -04:00
if element & . parent & . node_name & . downcase == " p " &&
element . parent . children . count == 1 &&
HTML5_BLOCK_ELEMENTS . include? ( parsed_onebox . children [ 0 ] . node_name . downcase )
2013-05-01 02:37:27 -04:00
element = element . parent
2013-04-10 03:52:38 -04:00
end
2018-02-13 18:39:44 -05:00
2013-04-10 03:52:38 -04:00
changed = true
element . swap parsed_onebox . to_html
end
end
2020-06-29 05:51:16 -04:00
# strip empty <p> elements
doc . css ( " p " ) . each do | p |
if p . children . empty? && doc . children . count > 1
p . remove
end
end
2013-04-10 03:52:38 -04:00
Result . new ( doc , changed )
end
2016-12-19 18:31:10 -05:00
def self . is_previewing? ( user_id )
2019-12-03 04:05:53 -05:00
Discourse . redis . get ( preview_key ( user_id ) ) == " 1 "
2016-12-19 18:31:10 -05:00
end
def self . preview_onebox! ( user_id )
2019-12-03 04:05:53 -05:00
Discourse . redis . setex ( preview_key ( user_id ) , 1 . minute , " 1 " )
2016-12-19 18:31:10 -05:00
end
def self . onebox_previewed! ( user_id )
2019-12-03 04:05:53 -05:00
Discourse . redis . del ( preview_key ( user_id ) )
2016-12-19 18:31:10 -05:00
end
2017-01-05 21:01:14 -05:00
def self . engine ( url )
Onebox :: Matcher . new ( url ) . oneboxed
end
2019-11-27 16:48:29 -05:00
def self . recently_failed? ( url )
Discourse . cache . read ( onebox_failed_cache_key ( url ) ) . present?
end
def self . cache_failed! ( url )
Discourse . cache . write ( onebox_failed_cache_key ( url ) , true , expires_in : 1 . hour )
end
2014-03-17 22:12:58 -04:00
private
2016-12-19 18:31:10 -05:00
def self . preview_key ( user_id )
2016-12-20 05:18:47 -05:00
" onebox:preview: #{ user_id } "
2016-12-19 18:31:10 -05:00
end
2016-10-24 06:46:22 -04:00
def self . blank_onebox
{ preview : " " , onebox : " " }
2014-04-09 16:57:45 -04:00
end
2016-10-24 06:46:22 -04:00
def self . onebox_cache_key ( url )
" onebox__ #{ url } "
end
2015-08-23 20:43:07 -04:00
2019-11-27 16:48:29 -05:00
def self . onebox_failed_cache_key ( url )
" onebox_failed__ #{ url } "
end
2018-02-13 18:39:44 -05:00
def self . onebox_raw ( url , opts = { } )
2018-03-15 10:27:55 -04:00
url = URI ( url ) . to_s
2018-02-13 18:39:44 -05:00
local_onebox ( url , opts ) || external_onebox ( url )
rescue = > e
# no point warning here, just cause we have an issue oneboxing a url
# we can later hunt for failed oneboxes by searching logs if needed
Rails . logger . info ( " Failed to onebox #{ url } #{ e } #{ e . backtrace } " )
# return a blank hash, so rest of the code works
blank_onebox
end
def self . local_onebox ( url , opts = { } )
return unless route = Discourse . route_for ( url )
html =
case route [ :controller ]
when " uploads " then local_upload_html ( url )
when " topics " then local_topic_html ( url , route , opts )
when " users " then local_user_html ( url , route )
end
html = html . presence || " <a href=' #{ url } '> #{ url } </a> "
{ onebox : html , preview : html }
end
def self . local_upload_html ( url )
case File . extname ( URI ( url ) . path || " " )
2019-10-31 09:13:24 -04:00
when VIDEO_REGEX
2019-11-17 20:25:42 -05:00
<< ~ HTML
< div class = " onebox video-onebox " >
< video width = " 100% " height = " 100% " controls = " " >
< source src = '#{url}' >
2020-01-22 18:41:39 -05:00
< a href = '#{url}' > #{url}</a>
2019-11-17 20:25:42 -05:00
< / video>
< / div>
HTML
2019-10-31 09:13:24 -04:00
when AUDIO_REGEX
2018-02-13 18:39:44 -05:00
" <audio controls><source src=' #{ url } '><a href=' #{ url } '> #{ url } </a></audio> "
end
2018-06-07 01:28:18 -04:00
end
2018-02-13 18:39:44 -05:00
2020-02-12 05:11:28 -05:00
def self . local_topic ( url , route , opts )
if current_user = User . find_by ( id : opts [ :user_id ] )
if current_category = Category . find_by ( id : opts [ :category_id ] )
return unless Guardian . new ( current_user ) . can_see_category? ( current_category )
end
2018-02-13 18:39:44 -05:00
2020-02-12 05:11:28 -05:00
if current_topic = Topic . find_by ( id : opts [ :topic_id ] )
return unless Guardian . new ( current_user ) . can_see_topic? ( current_topic )
end
2018-02-19 16:40:14 -05:00
end
2020-06-23 11:18:38 -04:00
return unless topic = Topic . find_by ( id : route [ :id ] || route [ :topic_id ] )
2018-02-15 16:56:13 -05:00
return if topic . private_message?
2018-02-15 16:00:06 -05:00
2020-02-12 05:11:28 -05:00
if current_category . blank? || current_category . id != topic . category_id
2018-02-15 16:56:13 -05:00
return unless Guardian . new . can_see_topic? ( topic )
end
2020-02-12 05:11:28 -05:00
topic
end
def self . local_topic_html ( url , route , opts )
return unless topic = local_topic ( url , route , opts )
2018-02-15 16:56:13 -05:00
post_number = route [ :post_number ] . to_i
2018-02-16 05:21:11 -05:00
post = post_number > 1 ?
topic . posts . where ( post_number : post_number ) . first :
topic . ordered_posts . first
2018-02-15 16:56:13 -05:00
2018-10-10 06:39:03 -04:00
return if ! post || post . hidden || ! allowed_post_types . include? ( post . post_type )
2018-02-13 18:39:44 -05:00
2020-02-12 05:11:28 -05:00
if post_number > 1 && opts [ :topic_id ] == topic . id
2018-02-13 18:39:44 -05:00
excerpt = post . excerpt ( SiteSetting . post_onebox_maxlength )
excerpt . gsub! ( / [ \ r \ n]+ / , " " )
excerpt . gsub! ( " [/quote] " , " [quote] " ) # don't break my quote
quote = " [quote= \" #{ post . user . username } , topic: #{ topic . id } , post: #{ post . post_number } \" ] \n #{ excerpt } \n [/quote] "
PrettyText . cook ( quote )
else
args = {
topic_id : topic . id ,
2018-02-26 10:05:35 -05:00
post_number : post . post_number ,
2018-02-20 13:49:39 -05:00
avatar : PrettyText . avatar_img ( post . user . avatar_template , " tiny " ) ,
2018-02-13 18:39:44 -05:00
original_url : url ,
title : PrettyText . unescape_emoji ( CGI :: escapeHTML ( topic . title ) ) ,
category_html : CategoryBadge . html_for ( topic . category ) ,
2018-02-26 05:16:53 -05:00
quote : PrettyText . unescape_emoji ( post . excerpt ( SiteSetting . post_onebox_maxlength ) ) ,
2018-02-13 18:39:44 -05:00
}
2020-03-11 09:42:14 -04:00
template = File . read ( " #{ Rails . root } /lib/onebox/templates/discourse_topic_onebox.mustache " )
2018-02-13 18:39:44 -05:00
Mustache . render ( template , args )
end
2018-06-07 01:28:18 -04:00
end
2018-02-13 18:39:44 -05:00
def self . local_user_html ( url , route )
username = route [ :username ] || " "
2018-06-07 01:28:18 -04:00
2018-02-13 18:39:44 -05:00
if user = User . find_by ( username_lower : username . downcase )
2019-03-25 03:20:14 -04:00
name = user . name if SiteSetting . enable_names
2018-02-13 18:39:44 -05:00
args = {
user_id : user . id ,
username : user . username ,
avatar : PrettyText . avatar_img ( user . avatar_template , " extra_large " ) ,
2019-03-25 03:20:14 -04:00
name : name ,
2018-02-13 18:39:44 -05:00
bio : user . user_profile . bio_excerpt ( 230 ) ,
2019-09-17 16:12:50 -04:00
location : Onebox :: Helpers . sanitize ( user . user_profile . location ) ,
2018-02-13 18:39:44 -05:00
joined : I18n . t ( 'joined' ) ,
created_at : user . created_at . strftime ( I18n . t ( 'datetime_formats.formats.date_only' ) ) ,
website : user . user_profile . website ,
website_name : UserSerializer . new ( user ) . website_name ,
original_url : url
}
2018-06-07 01:28:18 -04:00
2020-03-11 09:42:14 -04:00
template = File . read ( " #{ Rails . root } /lib/onebox/templates/discourse_user_onebox.mustache " )
2018-02-13 18:39:44 -05:00
Mustache . render ( template , args )
else
nil
end
2018-06-07 01:28:18 -04:00
end
2018-02-13 18:39:44 -05:00
2020-07-26 20:23:54 -04:00
def self . blocked_domains
SiteSetting . blocked_onebox_domains . split ( " | " )
2018-09-17 14:00:16 -04:00
end
2018-12-19 01:27:07 -05:00
def self . preserve_fragment_url_hosts
2018-12-19 07:07:39 -05:00
@preserve_fragment_url_hosts || = [ 'http://github.com' ]
2018-12-19 01:27:07 -05:00
end
2018-02-13 18:39:44 -05:00
def self . external_onebox ( url )
2019-11-26 20:35:14 -05:00
Discourse . cache . fetch ( onebox_cache_key ( url ) , expires_in : 1 . day ) do
2019-11-07 04:14:43 -05:00
fd = FinalDestination . new ( url ,
ignore_redirects : ignore_redirects ,
2020-07-26 20:23:54 -04:00
ignore_hostnames : blocked_domains ,
2019-11-07 04:14:43 -05:00
force_get_hosts : force_get_hosts ,
force_custom_user_agent_hosts : force_custom_user_agent_hosts ,
preserve_fragment_url_hosts : preserve_fragment_url_hosts )
2017-06-06 15:02:11 -04:00
uri = fd . resolve
2020-07-26 20:23:54 -04:00
return blank_onebox if uri . blank? || blocked_domains . map { | hostname | uri . hostname . match? ( hostname ) } . any?
2017-12-18 12:31:41 -05:00
2017-06-06 15:02:11 -04:00
options = {
max_width : 695 ,
2020-06-23 23:00:00 -04:00
sanitize_config : Onebox :: DiscourseOneboxSanitizeConfig :: Config :: DISCOURSE_ONEBOX ,
hostname : GlobalSetting . hostname ,
2017-06-06 15:02:11 -04:00
}
options [ :cookie ] = fd . cookie if fd . cookie
2017-06-06 16:39:15 -04:00
r = Onebox . preview ( uri . to_s , options )
2017-12-18 12:31:41 -05:00
2018-02-13 18:39:44 -05:00
{ onebox : r . to_s , preview : r & . placeholder_html . to_s }
2016-10-24 06:46:22 -04:00
end
2018-06-07 01:28:18 -04:00
end
2014-03-17 22:12:58 -04:00
2013-02-05 14:16:51 -05:00
end