FEATURE: Censor Oneboxes (#12902)

Previously onebox content was not passed by the censor regex, meaning you could sneak in censored words via onebox.
This commit is contained in:
Bianca Nenciu 2021-06-03 04:39:12 +03:00 committed by GitHub
parent 58cb120aa2
commit d184fe59ca
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
3 changed files with 49 additions and 1 deletions

View File

@ -1,6 +1,7 @@
# frozen_string_literal: true
class WordWatcher
REPLACEMENT_LETTER ||= CGI.unescape_html("■")
def initialize(raw)
@raw = raw
@ -70,6 +71,27 @@ class WordWatcher
"watched-words-list:#{action}"
end
def self.censor(html)
regexp = WordWatcher.word_matcher_regexp(:censor)
return html if regexp.blank?
doc = Nokogiri::HTML5::fragment(html)
doc.traverse do |node|
if node.text?
node.content = node.content.gsub(regexp) do |match|
# the regex captures leading whitespaces
padding = match.size - match.lstrip.size
if padding > 0
match[0..padding - 1] + REPLACEMENT_LETTER * (match.size - padding)
else
REPLACEMENT_LETTER * match.size
end
end
end
end
doc.to_s
end
def self.clear_cache!
WatchedWord.actions.each do |a, i|
Discourse.cache.delete word_matcher_regexp_key(a)

View File

@ -455,7 +455,10 @@ module Oneboxer
onebox_options[:user_agent] = user_agent_override if user_agent_override
r = Onebox.preview(uri.to_s, onebox_options)
result = { onebox: r.to_s, preview: r&.placeholder_html.to_s }
result = {
onebox: WordWatcher.censor(r.to_s),
preview: WordWatcher.censor(r&.placeholder_html.to_s)
}
# NOTE: Call r.errors after calling placeholder_html
if r.errors.any?

View File

@ -177,6 +177,29 @@ describe Oneboxer do
expect(Oneboxer.external_onebox(url)[:onebox]).to be_present
end
it "censors external oneboxes" do
Fabricate(:watched_word, action: WatchedWord.actions[:censor], word: "bad word")
url = 'https://example.com/'
stub_request(:any, url).to_return(status: 200, body: <<~HTML, headers: {})
<html>
<head>
<meta property="og:title" content="title with bad word">
<meta property="og:description" content="description with bad word">
</head>
<body>
<p>content with bad word</p>
</body>
<html>
HTML
onebox = Oneboxer.external_onebox(url)
expect(onebox[:onebox]).to include('title with')
expect(onebox[:onebox]).not_to include('bad word')
expect(onebox[:preview]).to include('title with')
expect(onebox[:preview]).not_to include('bad word')
end
it "uses the Onebox custom user agent on specified hosts" do
SiteSetting.force_custom_user_agent_hosts = "http://codepen.io|https://video.discourse.org/"
url = 'https://video.discourse.org/presentation.mp4'