diff --git a/app/models/post_analyzer.rb b/app/models/post_analyzer.rb index f5ff4117b1c..09f7cd9932f 100644 --- a/app/models/post_analyzer.rb +++ b/app/models/post_analyzer.rb @@ -19,7 +19,7 @@ class PostAnalyzer result = Oneboxer.apply(cooked, topic_id: @topic_id) do |url, _| @found_oneboxes = true Oneboxer.invalidate(url) if args.last[:invalidate_oneboxes] - Oneboxer.cached_onebox url + Oneboxer.cached_onebox(url) end cooked = result.to_html if result.changed? @@ -30,10 +30,9 @@ class PostAnalyzer def image_count return 0 unless @raw.present? - cooked_document.search("img").reject do |t| - dom_class = t["class"] - if dom_class - (Post.white_listed_image_classes & dom_class.split(" ")).count > 0 + cooked_stripped.css("img").reject do |t| + if dom_class = t["class"] + (Post.white_listed_image_classes & dom_class.split).count > 0 end end.count end @@ -42,8 +41,8 @@ class PostAnalyzer def attachment_count return 0 unless @raw.present? - attachments = cooked_document.css("a.attachment[href^=\"#{Discourse.store.absolute_base_url}\"]") - attachments += cooked_document.css("a.attachment[href^=\"#{Discourse.store.relative_base_url}\"]") if Discourse.store.internal? + attachments = cooked_stripped.css("a.attachment[href^=\"#{Discourse.store.absolute_base_url}\"]") + attachments += cooked_stripped.css("a.attachment[href^=\"#{Discourse.store.relative_base_url}\"]") if Discourse.store.internal? attachments.count end @@ -51,13 +50,6 @@ class PostAnalyzer return [] if @raw.blank? return @raw_mentions if @raw_mentions.present? - # strip quotes, code blocks and oneboxes - cooked_stripped = cooked_document - cooked_stripped.css("aside.quote").remove - cooked_stripped.css("pre").remove - cooked_stripped.css("code").remove - cooked_stripped.css(".onebox").remove - raw_mentions = cooked_stripped.css('.mention, .mention-group').map do |e| if name = e.inner_text name = name[1..-1] @@ -105,11 +97,10 @@ class PostAnalyzer @raw_links = [] - cooked_document.search("a").each do |l| + cooked_stripped.css("a[href]").each do |l| # Don't include @mentions in the link count - next if l.attributes['href'].nil? || link_is_a_mention?(l) - url = l.attributes['href'].to_s - @raw_links << url + next if l['href'].blank? || link_is_a_mention?(l) + @raw_links << l['href'].to_s end @raw_links @@ -122,13 +113,18 @@ class PostAnalyzer private - def cooked_document - @cooked_document ||= Nokogiri::HTML.fragment(cook(@raw, topic_id: @topic_id)) - end + def cooked_stripped + @cooked_stripped ||= begin + doc = Nokogiri::HTML.fragment(cook(@raw, topic_id: @topic_id)) + doc.css("pre, code, aside.quote, .onebox, .elided").remove + doc + end + end + + def link_is_a_mention?(l) + html_class = l['class'] + return false if html_class.blank? + html_class.to_s['mention'] && l['href'].to_s[/^\/users\//] + end - def link_is_a_mention?(l) - html_class = l.attributes['class'] - return false if html_class.nil? - html_class.to_s == 'mention' && l.attributes['href'].to_s =~ /^\/users\// - end end diff --git a/spec/models/post_analyzer_spec.rb b/spec/models/post_analyzer_spec.rb index 91cefd382b2..ecda5cc6a0c 100644 --- a/spec/models/post_analyzer_spec.rb +++ b/spec/models/post_analyzer_spec.rb @@ -38,8 +38,9 @@ describe PostAnalyzer do context "links" do let(:raw_no_links) { "hello world my name is evil trout" } let(:raw_one_link_md) { "[jlawr](http://www.imdb.com/name/nm2225369)" } - let(:raw_two_links_html) { "disney reddit"} - let(:raw_three_links) { "http://discourse.org and http://discourse.org/another_url and http://www.imdb.com/name/nm2225369"} + let(:raw_two_links_html) { "disney reddit" } + let(:raw_three_links) { "http://discourse.org and http://discourse.org/another_url and http://www.imdb.com/name/nm2225369" } + let(:raw_elided) { "
\n···\nhttp://discourse.org\n
" } describe "raw_links" do it "returns a blank collection for a post with no links" do @@ -61,6 +62,12 @@ describe PostAnalyzer do post_analyzer = PostAnalyzer.new(raw_three_links, default_topic_id) expect(post_analyzer.raw_links).to eq(["http://discourse.org", "http://discourse.org/another_url", "http://www.imdb.com/name/nm2225369"]) end + + it "doesn't extract links from elided part" do + post_analyzer = PostAnalyzer.new(raw_elided, default_topic_id) + post_analyzer.expects(:cook).returns("

\n···\ndiscourse.org\n

") + expect(post_analyzer.raw_links).to be_blank + end end describe "linked_hosts" do