FEATURE: Censor Oneboxes (#12902)
Previously onebox content was not passed by the censor regex, meaning you could sneak in censored words via onebox.
This commit is contained in:
parent
58cb120aa2
commit
d184fe59ca
|
@ -1,6 +1,7 @@
|
|||
# frozen_string_literal: true
|
||||
|
||||
class WordWatcher
|
||||
REPLACEMENT_LETTER ||= CGI.unescape_html("■")
|
||||
|
||||
def initialize(raw)
|
||||
@raw = raw
|
||||
|
@ -70,6 +71,27 @@ class WordWatcher
|
|||
"watched-words-list:#{action}"
|
||||
end
|
||||
|
||||
def self.censor(html)
|
||||
regexp = WordWatcher.word_matcher_regexp(:censor)
|
||||
return html if regexp.blank?
|
||||
|
||||
doc = Nokogiri::HTML5::fragment(html)
|
||||
doc.traverse do |node|
|
||||
if node.text?
|
||||
node.content = node.content.gsub(regexp) do |match|
|
||||
# the regex captures leading whitespaces
|
||||
padding = match.size - match.lstrip.size
|
||||
if padding > 0
|
||||
match[0..padding - 1] + REPLACEMENT_LETTER * (match.size - padding)
|
||||
else
|
||||
REPLACEMENT_LETTER * match.size
|
||||
end
|
||||
end
|
||||
end
|
||||
end
|
||||
doc.to_s
|
||||
end
|
||||
|
||||
def self.clear_cache!
|
||||
WatchedWord.actions.each do |a, i|
|
||||
Discourse.cache.delete word_matcher_regexp_key(a)
|
||||
|
|
|
@ -455,7 +455,10 @@ module Oneboxer
|
|||
onebox_options[:user_agent] = user_agent_override if user_agent_override
|
||||
|
||||
r = Onebox.preview(uri.to_s, onebox_options)
|
||||
result = { onebox: r.to_s, preview: r&.placeholder_html.to_s }
|
||||
result = {
|
||||
onebox: WordWatcher.censor(r.to_s),
|
||||
preview: WordWatcher.censor(r&.placeholder_html.to_s)
|
||||
}
|
||||
|
||||
# NOTE: Call r.errors after calling placeholder_html
|
||||
if r.errors.any?
|
||||
|
|
|
@ -177,6 +177,29 @@ describe Oneboxer do
|
|||
expect(Oneboxer.external_onebox(url)[:onebox]).to be_present
|
||||
end
|
||||
|
||||
it "censors external oneboxes" do
|
||||
Fabricate(:watched_word, action: WatchedWord.actions[:censor], word: "bad word")
|
||||
|
||||
url = 'https://example.com/'
|
||||
stub_request(:any, url).to_return(status: 200, body: <<~HTML, headers: {})
|
||||
<html>
|
||||
<head>
|
||||
<meta property="og:title" content="title with bad word">
|
||||
<meta property="og:description" content="description with bad word">
|
||||
</head>
|
||||
<body>
|
||||
<p>content with bad word</p>
|
||||
</body>
|
||||
<html>
|
||||
HTML
|
||||
|
||||
onebox = Oneboxer.external_onebox(url)
|
||||
expect(onebox[:onebox]).to include('title with')
|
||||
expect(onebox[:onebox]).not_to include('bad word')
|
||||
expect(onebox[:preview]).to include('title with')
|
||||
expect(onebox[:preview]).not_to include('bad word')
|
||||
end
|
||||
|
||||
it "uses the Onebox custom user agent on specified hosts" do
|
||||
SiteSetting.force_custom_user_agent_hosts = "http://codepen.io|https://video.discourse.org/"
|
||||
url = 'https://video.discourse.org/presentation.mp4'
|
||||
|
|
Loading…
Reference in New Issue