From 48e5d1af0329ae3033f587f7a76f2485bf20434f Mon Sep 17 00:00:00 2001 From: Bianca Nenciu Date: Mon, 24 Jan 2022 02:33:23 +0200 Subject: [PATCH] FIX: Improve top links section from user summary (#15675) * Do not extract links for hotlinked images * Include only links that have been clicked at least once in user summary --- app/models/user_summary.rb | 1 + lib/pretty_text.rb | 3 +++ spec/components/pretty_text_spec.rb | 14 ++++++++++++++ spec/models/user_summary_spec.rb | 7 +++++++ 4 files changed, 25 insertions(+) diff --git a/app/models/user_summary.rb b/app/models/user_summary.rb index f30507117d0..4b53dcfcaff 100644 --- a/app/models/user_summary.rb +++ b/app/models/user_summary.rb @@ -40,6 +40,7 @@ class UserSummary .merge(Topic.listable_topics.visible.secured(@guardian)) .where(user: @user) .where(internal: false, reflection: false, quote: false) + .where('clicks > 0') .order('clicks DESC, topic_links.created_at DESC') .limit(MAX_SUMMARY_RESULTS) end diff --git a/lib/pretty_text.rb b/lib/pretty_text.rb index 9f08e803001..a5885ce22d6 100644 --- a/lib/pretty_text.rb +++ b/lib/pretty_text.rb @@ -371,6 +371,9 @@ module PrettyText # remove href inside quotes & oneboxes & elided part doc.css("aside.quote a, aside.onebox a, .elided a").remove + # remove hotlinked images + doc.css("a.onebox > img").each { |img| img.parent.remove } + # extract all links doc.css("a").each do |a| if a["href"].present? && a["href"][0] != "#" diff --git a/spec/components/pretty_text_spec.rb b/spec/components/pretty_text_spec.rb index dcd3bd849ef..8e180c5eb51 100644 --- a/spec/components/pretty_text_spec.rb +++ b/spec/components/pretty_text_spec.rb @@ -836,6 +836,20 @@ describe PrettyText do expect(extract_urls("")).to eq(["/t/321"]) end + it "does not extract links from hotlinked images" do + html = <<~HTML +

+ example + + + + +

+ HTML + + expect(extract_urls(html)).to eq(["https://example.com"]) + end + it "should lazyYT videos" do expect(extract_urls("
")).to eq(["https://www.youtube.com/watch?v=yXEuEUQIP3Q"]) end diff --git a/spec/models/user_summary_spec.rb b/spec/models/user_summary_spec.rb index 4126632075f..a0e6840efa9 100644 --- a/spec/models/user_summary_spec.rb +++ b/spec/models/user_summary_spec.rb @@ -93,4 +93,11 @@ describe UserSummary do expect(summary.top_categories.first[:topic_count]).to eq(1) expect(summary.top_categories.first[:post_count]).to eq(1) end + + it "does not include summaries with no clicks" do + post = Fabricate(:post, raw: "[example](https://example.com)") + TopicLink.extract_from(post) + summary = UserSummary.new(post.user, Guardian.new) + expect(summary.links.length).to eq(0) + end end