2019-04-29 20:27:42 -04:00
# frozen_string_literal: true
2015-10-11 05:41:23 -04:00
require 'rails_helper'
2013-05-30 14:34:44 -04:00
describe PostAnalyzer do
2013-06-19 03:19:42 -04:00
let ( :default_topic_id ) { 12 }
2015-06-24 05:44:58 -04:00
let ( :url ) { 'https://twitter.com/evil_trout/status/345954894420787200' }
2013-06-19 03:19:42 -04:00
describe '#cook' do
2014-06-26 01:28:08 -04:00
let ( :post_analyzer ) { PostAnalyzer . new nil , nil }
2013-06-19 03:19:42 -04:00
let ( :raw ) { " Here's a tweet: \n #{ url } " }
let ( :options ) { { } }
before { Oneboxer . stubs ( :onebox ) }
2014-03-18 00:22:39 -04:00
it 'fetches the cached onebox for any urls in the post' do
2019-09-10 06:59:48 -04:00
Oneboxer . expects ( :cached_onebox ) . with ( url ) . returns ( 'something' )
2017-10-17 14:37:51 -04:00
post_analyzer . cook ( raw , options )
2016-04-12 14:09:59 -04:00
expect ( post_analyzer . found_oneboxes? ) . to be ( true )
2013-06-19 03:19:42 -04:00
end
it 'does not invalidate the onebox cache' do
Oneboxer . expects ( :invalidate ) . with ( url ) . never
2017-10-17 14:37:51 -04:00
post_analyzer . cook ( raw , options )
2013-06-19 03:19:42 -04:00
end
context 'when invalidating oneboxes' do
let ( :options ) { { invalidate_oneboxes : true } }
it 'invalidates the oneboxes for urls in the post' do
Oneboxer . expects ( :invalidate ) . with url
2020-06-24 05:54:54 -04:00
InlineOneboxer . expects ( :invalidate ) . with url
2017-10-17 14:37:51 -04:00
post_analyzer . cook ( raw , options )
2013-06-19 03:19:42 -04:00
end
end
2017-10-17 14:37:51 -04:00
it " does nothing when the cook_method is 'raw_html' " do
cooked = post_analyzer . cook ( 'Hello <div/> world' , cook_method : Post . cook_methods [ :raw_html ] )
expect ( cooked ) . to eq ( 'Hello <div/> world' )
end
2017-11-15 10:39:29 -05:00
it " does not interpret Markdown when cook_method is 'email' and raw contains plaintext " do
cooked = post_analyzer . cook ( " [plaintext] \n *this is not italic* and here is a link: https://www.example.com \n [/plaintext] " , cook_method : Post . cook_methods [ :email ] )
2017-10-17 14:37:51 -04:00
expect ( cooked ) . to eq ( '*this is not italic* and here is a link: <a href="https://www.example.com">https://www.example.com</a>' )
end
2017-11-15 10:39:29 -05:00
it " does interpret Markdown when cook_method is 'email' and raw does not contain plaintext " do
cooked = post_analyzer . cook ( '*this is italic*' , cook_method : Post . cook_methods [ :email ] )
expect ( cooked ) . to eq ( '<p><em>this is italic</em></p>' )
end
2017-10-17 14:37:51 -04:00
it " does interpret Markdown when cook_method is 'regular' " do
cooked = post_analyzer . cook ( '*this is italic*' , cook_method : Post . cook_methods [ :regular ] )
expect ( cooked ) . to eq ( '<p><em>this is italic</em></p>' )
end
it " does interpret Markdown when not cook_method is set " do
cooked = post_analyzer . cook ( '*this is italic*' )
expect ( cooked ) . to eq ( '<p><em>this is italic</em></p>' )
end
2013-05-30 14:34:44 -04:00
end
context " links " do
let ( :raw_no_links ) { " hello world my name is evil trout " }
let ( :raw_one_link_md ) { " [jlawr](http://www.imdb.com/name/nm2225369) " }
2016-12-05 09:19:15 -05:00
let ( :raw_two_links_html ) { " <a href='http://disneyland.disney.go.com/'>disney</a> <a href='http://reddit.com'>reddit</a> " }
let ( :raw_three_links ) { " http://discourse.org and http://discourse.org/another_url and http://www.imdb.com/name/nm2225369 " }
let ( :raw_elided ) { " <details class='elided'> \n <summary title='Show trimmed content'>& # 183;& # 183;& # 183;</summary> \n http://discourse.org \n </details> " }
2013-05-30 14:34:44 -04:00
describe " raw_links " do
it " returns a blank collection for a post with no links " do
post_analyzer = PostAnalyzer . new ( raw_no_links , default_topic_id )
2014-12-31 09:55:03 -05:00
expect ( post_analyzer . raw_links ) . to be_blank
2013-05-30 14:34:44 -04:00
end
it " finds a link within markdown " do
post_analyzer = PostAnalyzer . new ( raw_one_link_md , default_topic_id )
2014-12-31 09:55:03 -05:00
expect ( post_analyzer . raw_links ) . to eq ( [ " http://www.imdb.com/name/nm2225369 " ] )
2013-05-30 14:34:44 -04:00
end
it " can find two links from html " do
post_analyzer = PostAnalyzer . new ( raw_two_links_html , default_topic_id )
2014-12-31 09:55:03 -05:00
expect ( post_analyzer . raw_links ) . to eq ( [ " http://disneyland.disney.go.com/ " , " http://reddit.com " ] )
2013-05-30 14:34:44 -04:00
end
it " can find three links without markup " do
post_analyzer = PostAnalyzer . new ( raw_three_links , default_topic_id )
2014-12-31 09:55:03 -05:00
expect ( post_analyzer . raw_links ) . to eq ( [ " http://discourse.org " , " http://discourse.org/another_url " , " http://www.imdb.com/name/nm2225369 " ] )
2013-05-30 14:34:44 -04:00
end
2016-12-05 09:19:15 -05:00
it " doesn't extract links from elided part " do
post_analyzer = PostAnalyzer . new ( raw_elided , default_topic_id )
post_analyzer . expects ( :cook ) . returns ( " <p><details class='elided'> \n <summary title='Show trimmed content'>& # 183;& # 183;& # 183;</summary> \n <a href='http://discourse.org'>discourse.org</a> \n </details></p> " )
expect ( post_analyzer . raw_links ) . to be_blank
end
2013-05-30 14:34:44 -04:00
end
describe " linked_hosts " do
it " returns blank with no links " do
post_analyzer = PostAnalyzer . new ( raw_no_links , default_topic_id )
2014-12-31 09:55:03 -05:00
expect ( post_analyzer . linked_hosts ) . to be_blank
2013-05-30 14:34:44 -04:00
end
it " returns the host and a count for links " do
post_analyzer = PostAnalyzer . new ( raw_two_links_html , default_topic_id )
2014-12-31 09:55:03 -05:00
expect ( post_analyzer . linked_hosts ) . to eq ( " disneyland.disney.go.com " = > 1 , " reddit.com " = > 1 )
2013-05-30 14:34:44 -04:00
end
it " it counts properly with more than one link on the same host " do
post_analyzer = PostAnalyzer . new ( raw_three_links , default_topic_id )
2014-12-31 09:55:03 -05:00
expect ( post_analyzer . linked_hosts ) . to eq ( " discourse.org " = > 1 , " www.imdb.com " = > 1 )
2013-05-30 14:34:44 -04:00
end
end
end
2020-08-07 12:08:59 -04:00
describe " embedded_media_count " do
2013-05-30 14:34:44 -04:00
let ( :raw_post_one_image_md ) { " ![sherlock](http://bbc.co.uk/sherlock.jpg) " }
let ( :raw_post_two_images_html ) { " <img src='http://discourse.org/logo.png'> <img src='http://bbc.co.uk/sherlock.jpg'> " }
let ( :raw_post_with_avatars ) { '<img alt="smiley" title=":smiley:" src="/assets/emoji/smiley.png" class="avatar"> <img alt="wink" title=":wink:" src="/assets/emoji/wink.png" class="avatar">' }
2021-04-11 23:57:39 -04:00
let ( :raw_post_with_favicon ) { '<img src="/images/favicons/discourse.png" class="favicon">' }
2013-05-30 14:34:44 -04:00
let ( :raw_post_with_thumbnail ) { '<img src="/assets/emoji/smiley.png" class="thumbnail">' }
let ( :raw_post_with_two_classy_images ) { " <img src='http://discourse.org/logo.png' class='classy'> <img src='http://bbc.co.uk/sherlock.jpg' class='classy'> " }
2020-08-07 12:08:59 -04:00
let ( :raw_post_with_two_embedded_media ) { '<video width="950" height="700" controls><source src="https://bbc.co.uk/news.mp4" type="video/mp4"></video><audio controls><source type="audio/mpeg" src="https://example.com/audio.mp3"></audio>' }
2013-05-30 14:34:44 -04:00
it " returns 0 images for an empty post " do
post_analyzer = PostAnalyzer . new ( " Hello world " , nil )
2020-08-07 12:08:59 -04:00
expect ( post_analyzer . embedded_media_count ) . to eq ( 0 )
2013-05-30 14:34:44 -04:00
end
it " finds images from markdown " do
post_analyzer = PostAnalyzer . new ( raw_post_one_image_md , default_topic_id )
2020-08-07 12:08:59 -04:00
expect ( post_analyzer . embedded_media_count ) . to eq ( 1 )
2013-05-30 14:34:44 -04:00
end
it " finds images from HTML " do
post_analyzer = PostAnalyzer . new ( raw_post_two_images_html , default_topic_id )
2020-08-07 12:08:59 -04:00
expect ( post_analyzer . embedded_media_count ) . to eq ( 2 )
end
it " finds video and audio from HTML " do
post_analyzer = PostAnalyzer . new ( raw_post_with_two_embedded_media , default_topic_id )
expect ( post_analyzer . embedded_media_count ) . to eq ( 2 )
2013-05-30 14:34:44 -04:00
end
it " doesn't count avatars as images " do
post_analyzer = PostAnalyzer . new ( raw_post_with_avatars , default_topic_id )
2017-10-15 19:46:01 -04:00
PrettyText . stubs ( :cook ) . returns ( raw_post_with_avatars )
2020-08-07 12:08:59 -04:00
expect ( post_analyzer . embedded_media_count ) . to eq ( 0 )
2013-05-30 14:34:44 -04:00
end
it " doesn't count favicons as images " do
post_analyzer = PostAnalyzer . new ( raw_post_with_favicon , default_topic_id )
2017-10-15 19:46:01 -04:00
PrettyText . stubs ( :cook ) . returns ( raw_post_with_favicon )
2020-08-07 12:08:59 -04:00
expect ( post_analyzer . embedded_media_count ) . to eq ( 0 )
2013-05-30 14:34:44 -04:00
end
it " doesn't count thumbnails as images " do
post_analyzer = PostAnalyzer . new ( raw_post_with_thumbnail , default_topic_id )
2017-10-15 19:46:01 -04:00
PrettyText . stubs ( :cook ) . returns ( raw_post_with_thumbnail )
2020-08-07 12:08:59 -04:00
expect ( post_analyzer . embedded_media_count ) . to eq ( 0 )
2013-05-30 14:34:44 -04:00
end
2020-07-26 20:23:54 -04:00
it " doesn't count allowlisted images " do
Post . stubs ( :allowed_image_classes ) . returns ( [ " classy " ] )
2017-10-15 19:46:01 -04:00
PrettyText . stubs ( :cook ) . returns ( raw_post_with_two_classy_images )
2013-05-30 14:34:44 -04:00
post_analyzer = PostAnalyzer . new ( raw_post_with_two_classy_images , default_topic_id )
2020-08-07 12:08:59 -04:00
expect ( post_analyzer . embedded_media_count ) . to eq ( 0 )
2013-05-30 14:34:44 -04:00
end
end
describe " link_count " do
let ( :raw_post_one_link_md ) { " [sherlock](http://www.bbc.co.uk/programmes/b018ttws) " }
let ( :raw_post_two_links_html ) { " <a href='http://discourse.org'>discourse</a> <a href='http://twitter.com'>twitter</a> " }
let ( :raw_post_with_mentions ) { " hello @novemberkilo how are you doing? " }
2021-04-14 03:27:07 -04:00
let ( :raw_post_with_anchors ) { " # hello world " }
let ( :raw_post_with_hashtags ) { " a category #{ Fabricate ( :category ) . slug } and a tag #{ Fabricate ( :tag ) . name } " }
2013-05-30 14:34:44 -04:00
it " returns 0 links for an empty post " do
post_analyzer = PostAnalyzer . new ( " Hello world " , nil )
2014-12-31 09:55:03 -05:00
expect ( post_analyzer . link_count ) . to eq ( 0 )
2013-05-30 14:34:44 -04:00
end
it " returns 0 links for a post with mentions " do
post_analyzer = PostAnalyzer . new ( raw_post_with_mentions , default_topic_id )
2014-12-31 09:55:03 -05:00
expect ( post_analyzer . link_count ) . to eq ( 0 )
2013-05-30 14:34:44 -04:00
end
2021-04-14 03:27:07 -04:00
it " returns 0 links for a post with anchors " do
post_analyzer = PostAnalyzer . new ( raw_post_with_anchors , default_topic_id )
expect ( post_analyzer . link_count ) . to eq ( 0 )
end
it " returns 0 links for a post with mentions " do
SiteSetting . tagging_enabled = true
post_analyzer = PostAnalyzer . new ( raw_post_with_hashtags , default_topic_id )
expect ( post_analyzer . link_count ) . to eq ( 0 )
end
2018-03-28 12:32:16 -04:00
it " returns links with href='' " do
post_analyzer = PostAnalyzer . new ( '<a href="">Hello world</a>' , nil )
expect ( post_analyzer . link_count ) . to eq ( 1 )
end
2013-05-30 14:34:44 -04:00
it " finds links from markdown " do
2013-06-19 03:19:42 -04:00
Oneboxer . stubs :onebox
2013-05-30 14:34:44 -04:00
post_analyzer = PostAnalyzer . new ( raw_post_one_link_md , default_topic_id )
2014-12-31 09:55:03 -05:00
expect ( post_analyzer . link_count ) . to eq ( 1 )
2013-05-30 14:34:44 -04:00
end
it " finds links from HTML " do
post_analyzer = PostAnalyzer . new ( raw_post_two_links_html , default_topic_id )
2016-04-12 14:09:59 -04:00
post_analyzer . cook ( raw_post_two_links_html , { } )
expect ( post_analyzer . found_oneboxes? ) . to be ( false )
2014-12-31 09:55:03 -05:00
expect ( post_analyzer . link_count ) . to eq ( 2 )
2013-05-30 14:34:44 -04:00
end
end
describe " raw_mentions " do
it " returns an empty array with no matches " do
post_analyzer = PostAnalyzer . new ( " Hello Jake and Finn! " , default_topic_id )
2014-12-31 09:55:03 -05:00
expect ( post_analyzer . raw_mentions ) . to eq ( [ ] )
2013-05-30 14:34:44 -04:00
end
it " returns lowercase unique versions of the mentions " do
post_analyzer = PostAnalyzer . new ( " @Jake @Finn @Jake " , default_topic_id )
2014-12-31 09:55:03 -05:00
expect ( post_analyzer . raw_mentions ) . to eq ( [ 'jake' , 'finn' ] )
2013-05-30 14:34:44 -04:00
end
it " ignores pre " do
2017-07-14 08:27:28 -04:00
# note, CommonMark has rules for dealing with HTML, if your paragraph starts with it
# it will no longer be an "inline" so this means that @Finn in this case would not be a mention
post_analyzer = PostAnalyzer . new ( " . <pre>@Jake</pre> @Finn " , default_topic_id )
2014-12-31 09:55:03 -05:00
expect ( post_analyzer . raw_mentions ) . to eq ( [ 'finn' ] )
2013-05-30 14:34:44 -04:00
end
it " catches content between pre tags " do
2017-07-14 08:27:28 -04:00
post_analyzer = PostAnalyzer . new ( " . <pre>hello</pre> @Finn <pre></pre> " , default_topic_id )
2014-12-31 09:55:03 -05:00
expect ( post_analyzer . raw_mentions ) . to eq ( [ 'finn' ] )
2013-05-30 14:34:44 -04:00
end
it " ignores code " do
2013-09-11 15:52:37 -04:00
post_analyzer = PostAnalyzer . new ( " @Jake `@Finn` " , default_topic_id )
2014-12-31 09:55:03 -05:00
expect ( post_analyzer . raw_mentions ) . to eq ( [ 'jake' ] )
2013-05-30 14:34:44 -04:00
end
2013-07-30 18:01:42 -04:00
it " ignores code in markdown-formatted code blocks " do
post_analyzer = PostAnalyzer . new ( " @Jake @Finn \n @Ryan " , default_topic_id )
2014-12-31 09:55:03 -05:00
expect ( post_analyzer . raw_mentions ) . to eq ( [ 'ryan' ] )
2013-07-30 18:01:42 -04:00
end
2013-05-30 14:34:44 -04:00
it " ignores quotes " do
2017-07-14 08:27:28 -04:00
post_analyzer = PostAnalyzer . new ( " [quote= \" Evil Trout \" ] \n @Jake \n [/quote] \n @Finn " , default_topic_id )
2014-12-31 09:55:03 -05:00
expect ( post_analyzer . raw_mentions ) . to eq ( [ 'finn' ] )
2013-05-30 14:34:44 -04:00
end
2020-02-10 12:31:42 -05:00
it " ignores group mentions in quotes " do
Fabricate ( :group , name : " team " )
Fabricate ( :group , name : " mods " )
post_analyzer = PostAnalyzer . new ( " [quote= \" Evil Trout \" ] \n @team \n [/quote] \n @mods " , default_topic_id )
expect ( post_analyzer . raw_mentions ) . to eq ( [ " mods " ] )
end
2015-06-24 05:44:58 -04:00
it " ignores oneboxes " do
post_analyzer = PostAnalyzer . new ( " Hello @Jake \n #{ url } " , default_topic_id )
2016-11-20 07:49:14 -05:00
post_analyzer . stubs ( :cook ) . returns ( " <p>Hello <span class= \" mention \" >@Jake</span><br><a href= \" https://twitter.com/evil_trout/status/345954894420787200 \" class= \" onebox \" target= \" _blank \" rel= \" nofollow noopener \" >@Finn</a></p> " )
2015-06-24 05:44:58 -04:00
expect ( post_analyzer . raw_mentions ) . to eq ( [ 'jake' ] )
end
2013-05-30 14:34:44 -04:00
it " handles underscore in username " do
post_analyzer = PostAnalyzer . new ( " @Jake @Finn @Jake_Old " , default_topic_id )
2014-12-31 09:55:03 -05:00
expect ( post_analyzer . raw_mentions ) . to eq ( [ 'jake' , 'finn' , 'jake_old' ] )
2013-05-30 14:34:44 -04:00
end
2016-01-12 03:53:09 -05:00
2016-02-23 14:57:54 -05:00
it " handles hyphen in groupname " do
post_analyzer = PostAnalyzer . new ( " @org-board " , default_topic_id )
expect ( post_analyzer . raw_mentions ) . to eq ( [ 'org-board' ] )
end
2016-01-12 03:53:09 -05:00
it " ignores emails " do
post_analyzer = PostAnalyzer . new ( " 1@test.com 1@best.com @best @not " , default_topic_id )
expect ( post_analyzer . raw_mentions ) . to eq ( [ 'best' , 'not' ] )
end
2013-05-30 14:34:44 -04:00
end
end