FIX: wasn't extracting links to quoted posts

This commit is contained in:
Régis Hanol 2017-02-06 14:45:04 +01:00
parent ceee2a509a
commit ba115480ba
8 changed files with 57 additions and 46 deletions

View File

@ -206,7 +206,7 @@ GEM
omniauth-twitter (1.3.0) omniauth-twitter (1.3.0)
omniauth-oauth (~> 1.1) omniauth-oauth (~> 1.1)
rack rack
onebox (1.7.8) onebox (1.7.9)
fast_blank (>= 1.0.0) fast_blank (>= 1.0.0)
htmlentities (~> 4.3.4) htmlentities (~> 4.3.4)
moneta (~> 0.8) moneta (~> 0.8)
@ -476,4 +476,4 @@ DEPENDENCIES
unicorn unicorn
BUNDLED WITH BUNDLED WITH
1.14.2 1.14.3

View File

@ -0,0 +1,9 @@
class Sanitize
module Config
DISCOURSE_ONEBOX ||= freeze_config merge(ONEBOX,
attributes: merge(ONEBOX[:attributes], 'aside' => [:data])
)
end
end

View File

@ -69,7 +69,7 @@ module Onebox
first_post = topic.ordered_posts.first first_post = topic.ordered_posts.first
args = { args = {
topic: topic.id, topic_id: topic.id,
avatar: PrettyText.avatar_img(topic.user.avatar_template, "tiny"), avatar: PrettyText.avatar_img(topic.user.avatar_template, "tiny"),
original_url: @url, original_url: @url,
title: PrettyText.unescape_emoji(CGI::escapeHTML(topic.title)), title: PrettyText.unescape_emoji(CGI::escapeHTML(topic.title)),

View File

@ -1,4 +1,4 @@
<aside class='quote topic-onebox' data-post="1" data-topic="{{topic}}"> <aside class='quote' data-post="1" data-topic="{{topic_id}}">
<div class='title'> <div class='title'>
{{{avatar}}} {{{avatar}}}
<a href="{{original_url}}">{{{title}}}</a> {{{category_html}}} <a href="{{original_url}}">{{{title}}}</a> {{{category_html}}}

View File

@ -1,3 +1,4 @@
require_dependency "./lib/onebox/discourse_onebox_sanitize_config"
Dir["#{Rails.root}/lib/onebox/engine/*_onebox.rb"].sort.each { |f| require f } Dir["#{Rails.root}/lib/onebox/engine/*_onebox.rb"].sort.each { |f| require f }
module Oneboxer module Oneboxer
@ -142,8 +143,8 @@ module Oneboxer
Rails.cache.fetch(onebox_cache_key(url), expires_in: 1.day) do Rails.cache.fetch(onebox_cache_key(url), expires_in: 1.day) do
uri = URI(url) rescue nil uri = URI(url) rescue nil
return blank_onebox if uri.blank? || SiteSetting.onebox_domains_blacklist.include?(uri.hostname) return blank_onebox if uri.blank? || SiteSetting.onebox_domains_blacklist.include?(uri.hostname)
options = { cache: {}, max_width: 695, sanitize_config: Sanitize::Config::DISCOURSE_ONEBOX }
r = Onebox.preview(url, cache: {}, max_width: 695) r = Onebox.preview(url, options)
{ onebox: r.to_s, preview: r.try(:placeholder_html).to_s } { onebox: r.to_s, preview: r.try(:placeholder_html).to_s }
end end
rescue => e rescue => e

View File

@ -270,44 +270,36 @@ module PrettyText
end end
end end
class DetectedLink class DetectedLink < Struct.new(:url, :is_quote); end
attr_accessor :is_quote, :url
def initialize(url, is_quote=false)
@url = url
@is_quote = is_quote
end
end
def self.extract_links(html) def self.extract_links(html)
links = [] links = []
doc = Nokogiri::HTML.fragment(html) doc = Nokogiri::HTML.fragment(html)
# remove href inside quotes & elided part # remove href inside quotes & elided part
doc.css("aside.quote:not(.topic-onebox) a, .elided a").each { |l| l["href"] = "" } doc.css("aside.quote a, .elided a").each { |a| a["href"] = "" }
# extract all links from the post # extract all links
doc.css("a").each { |l| doc.css("a").each do |a|
unless l["href"].blank? || "#".freeze == l["href"][0] if a["href"].present? && a["href"][0] != "#".freeze
links << DetectedLink.new(l["href"]) links << DetectedLink.new(a["href"], false)
end end
}
# extract links to quotes
doc.css("aside.quote[data-topic]").each do |a|
topic_id = a['data-topic']
url = "/t/topic/#{topic_id}"
if post_number = a['data-post']
url << "/#{post_number}"
end
links << DetectedLink.new(url, true)
end end
# Extract Youtube links # extract quotes
doc.css("div[data-youtube-id]").each do |d| doc.css("aside.quote[data-topic]").each do |aside|
links << DetectedLink.new("https://www.youtube.com/watch?v=#{d['data-youtube-id']}", false) if aside["data-topic"].present?
url = "/t/topic/#{aside["data-topic"]}"
url << "/#{aside["data-post"]}" if aside["data-post"].present?
links << DetectedLink.new(url, true)
end
end
# extract Youtube links
doc.css("div[data-youtube-id]").each do |div|
if div["data-youtube-id"].present?
links << DetectedLink.new("https://www.youtube.com/watch?v=#{div['data-youtube-id']}", false)
end
end end
links links

View File

@ -225,12 +225,11 @@ HTML
<a href='http://body_and_quote.com'>http://useless2.com</a> <a href='http://body_and_quote.com'>http://useless2.com</a>
") ")
expect(links.map{|l| [l.url, l.is_quote]}.to_a.sort).to eq( expect(links.map { |l| [l.url, l.is_quote] }.sort).to eq([
[["http://body_only.com",false], ["http://body_only.com", false],
["http://body_and_quote.com", false], ["http://body_and_quote.com", false],
["/t/topic/1234",true] ["/t/topic/1234", true],
].sort ].sort)
)
end end
it "should not preserve tags in code blocks" do it "should not preserve tags in code blocks" do

View File

@ -9,9 +9,7 @@ describe Jobs::ProcessPost do
context 'with a post' do context 'with a post' do
let(:post) do let(:post) { Fabricate(:post) }
Fabricate(:post)
end
it 'does not erase posts when CookedPostProcessor malfunctions' do it 'does not erase posts when CookedPostProcessor malfunctions' do
# Look kids, an actual reason why you want to use mocks # Look kids, an actual reason why you want to use mocks
@ -35,7 +33,6 @@ describe Jobs::ProcessPost do
end end
it 'processes posts' do it 'processes posts' do
post = Fabricate(:post, raw: "<img src='#{Discourse.base_url_no_prefix}/awesome/picture.png'>") post = Fabricate(:post, raw: "<img src='#{Discourse.base_url_no_prefix}/awesome/picture.png'>")
expect(post.cooked).to match(/http/) expect(post.cooked).to match(/http/)
@ -51,7 +48,20 @@ describe Jobs::ProcessPost do
expect { Jobs::ProcessPost.new.execute(post_id: post.id) }.to change { TopicLink.count }.by(1) expect { Jobs::ProcessPost.new.execute(post_id: post.id) }.to change { TopicLink.count }.by(1)
end end
it "extracts links to quoted posts" do
quoted_post = Fabricate(:post, raw: "This is a post with a link to https://www.discourse.org", post_number: 42)
post.update_columns(raw: "This quote is the best\n\n[quote=\"#{quoted_post.user.username}, topic:#{quoted_post.topic_id}, post:#{quoted_post.post_number}\"]#{quoted_post.excerpt}[/quote]")
# when creating a quote, we also create the reflexion link
expect { Jobs::ProcessPost.new.execute(post_id: post.id) }.to change { TopicLink.count }.by(2)
end
it "extracts links to oneboxed topics" do
oneboxed_post = Fabricate(:post)
post.update_columns(raw: "This post is the best\n\n#{oneboxed_post.full_url}")
# when creating a quote, we also create the reflexion link
expect { Jobs::ProcessPost.new.execute(post_id: post.id) }.to change { TopicLink.count }.by(2)
end
end end
end end