FIX: Improve top links section from user summary (#15675)

* Do not extract links for hotlinked images
* Include only links that have been clicked at least once in user
summary
This commit is contained in:
Bianca Nenciu 2022-01-24 02:33:23 +02:00 committed by GitHub
parent cd68279f5c
commit 48e5d1af03
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
4 changed files with 25 additions and 0 deletions

View File

@ -40,6 +40,7 @@ class UserSummary
.merge(Topic.listable_topics.visible.secured(@guardian)) .merge(Topic.listable_topics.visible.secured(@guardian))
.where(user: @user) .where(user: @user)
.where(internal: false, reflection: false, quote: false) .where(internal: false, reflection: false, quote: false)
.where('clicks > 0')
.order('clicks DESC, topic_links.created_at DESC') .order('clicks DESC, topic_links.created_at DESC')
.limit(MAX_SUMMARY_RESULTS) .limit(MAX_SUMMARY_RESULTS)
end end

View File

@ -371,6 +371,9 @@ module PrettyText
# remove href inside quotes & oneboxes & elided part # remove href inside quotes & oneboxes & elided part
doc.css("aside.quote a, aside.onebox a, .elided a").remove doc.css("aside.quote a, aside.onebox a, .elided a").remove
# remove hotlinked images
doc.css("a.onebox > img").each { |img| img.parent.remove }
# extract all links # extract all links
doc.css("a").each do |a| doc.css("a").each do |a|
if a["href"].present? && a["href"][0] != "#" if a["href"].present? && a["href"][0] != "#"

View File

@ -836,6 +836,20 @@ describe PrettyText do
expect(extract_urls("<aside class=\"quote\" data-topic=\"321\">aside</aside>")).to eq(["/t/321"]) expect(extract_urls("<aside class=\"quote\" data-topic=\"321\">aside</aside>")).to eq(["/t/321"])
end end
it "does not extract links from hotlinked images" do
html = <<~HTML
<p>
<a href="https://example.com">example</a>
<a href="https://images.pexels.com/photos/1525041/pexels-photo-1525041.jpeg?auto=compress&amp;cs=tinysrgb&amp;w=1260&amp;h=750&amp;dpr=2" target="_blank" rel="noopener" class="onebox">
<img src="https://images.pexels.com/photos/1525041/pexels-photo-1525041.jpeg?auto=compress&amp;cs=tinysrgb&amp;w=1260&amp;h=750&amp;dpr=2" width="690" height="459">
</a>
</p>
HTML
expect(extract_urls(html)).to eq(["https://example.com"])
end
it "should lazyYT videos" do it "should lazyYT videos" do
expect(extract_urls("<div class=\"lazyYT\" data-youtube-id=\"yXEuEUQIP3Q\" data-youtube-title=\"Mister Rogers defending PBS to the US Senate\" data-width=\"480\" data-height=\"270\" data-parameters=\"feature=oembed&amp;wmode=opaque\"></div>")).to eq(["https://www.youtube.com/watch?v=yXEuEUQIP3Q"]) expect(extract_urls("<div class=\"lazyYT\" data-youtube-id=\"yXEuEUQIP3Q\" data-youtube-title=\"Mister Rogers defending PBS to the US Senate\" data-width=\"480\" data-height=\"270\" data-parameters=\"feature=oembed&amp;wmode=opaque\"></div>")).to eq(["https://www.youtube.com/watch?v=yXEuEUQIP3Q"])
end end

View File

@ -93,4 +93,11 @@ describe UserSummary do
expect(summary.top_categories.first[:topic_count]).to eq(1) expect(summary.top_categories.first[:topic_count]).to eq(1)
expect(summary.top_categories.first[:post_count]).to eq(1) expect(summary.top_categories.first[:post_count]).to eq(1)
end end
it "does not include summaries with no clicks" do
post = Fabricate(:post, raw: "[example](https://example.com)")
TopicLink.extract_from(post)
summary = UserSummary.new(post.user, Guardian.new)
expect(summary.links.length).to eq(0)
end
end end