FEATURE: add censored_pattern setting to censor posts using regex

This commit is contained in:
Neil Lalonde 2016-11-08 16:36:34 -05:00
parent 7d560ea3d5
commit 86522a52b7
10 changed files with 121 additions and 18 deletions

View File

@ -1,19 +1,38 @@
export function censor(text, censoredWords) {
if (censoredWords && censoredWords.length) {
const split = censoredWords.split("|");
let censorRegexp;
if (split && split.length) {
censorRegexp = new RegExp("(\\b(?:" + split.map(function (t) { return "(" + t.replace(/[-/\\^$*+?.()|[\]{}]/g, '\\$&') + ")"; }).join("|") + ")\\b)(?![^\\(]*\\))", "ig");
}
export function censor(text, censoredWords, censoredPattern) {
let patterns = [],
originalText = text;
if (censorRegexp) {
let m = censorRegexp.exec(text);
while (m && m[0]) {
const replacement = new Array(m[0].length+1).join('■');
text = text.replace(new RegExp("(\\b" + m[0] + "\\b)(?![^\\(]*\\))", "ig"), replacement);
m = censorRegexp.exec(text);
if (censoredWords && censoredWords.length) {
patterns = censoredWords.split("|").map(t => { return "(" + t.replace(/[-/\\^$*+?.()|[\]{}]/g, '\\$&') + ")"; });
}
if (censoredPattern && censoredPattern.length > 0) {
try {
new RegExp(censoredPattern); // exception if invalid
patterns.push("(" + censoredPattern + ")");
} catch(e) {}
}
if (patterns.length) {
let censorRegexp;
try {
censorRegexp = new RegExp("(\\b(?:" + patterns.join("|") + ")\\b)(?![^\\(]*\\))", "ig");
if (censorRegexp) {
let m = censorRegexp.exec(text);
while (m && m[0]) {
if (m[0].length > originalText.length) { return originalText; } // regex is dangerous
const replacement = new Array(m[0].length+1).join('■');
text = text.replace(new RegExp("(\\b" + m[0] + "\\b)(?![^\\(]*\\))", "ig"), replacement);
m = censorRegexp.exec(text);
}
}
} catch(e) {
return originalText;
}
}
return text;
}

View File

@ -4,10 +4,12 @@ import { registerOption } from 'pretty-text/pretty-text';
registerOption((siteSettings, opts) => {
opts.features.censored = true;
opts.censoredWords = siteSettings.censored_words;
opts.censoredPattern = siteSettings.censored_pattern;
});
export function setup(helper) {
helper.addPreProcessor(text => {
return censor(text, helper.getOptions().censoredWords);
const options = helper.getOptions();
return censor(text, options.censoredWords, options.censoredPattern);
});
}

View File

@ -828,6 +828,7 @@ en:
site_settings:
censored_words: "Words that will be automatically replaced with ■■■■"
censored_pattern: "Regex pattern that will be automatically replaced with ■■■■"
delete_old_hidden_posts: "Auto-delete any hidden posts that stay hidden for more than 30 days."
default_locale: "The default language of this Discourse instance (ISO 639-1 Code)"
allow_user_locale: "Allow users to choose their own language interface preference"
@ -1442,6 +1443,7 @@ en:
reply_by_email_address_is_empty: "You must set a 'reply by email address' before enabling reply by email."
email_polling_disabled: "You must enable either manual or POP3 polling before enabling reply by email."
user_locale_not_enabled: "You must first enable 'allow user locale' before enabling this setting."
invalid_regex: "Regex is invalid or not allowed."
search:
within_post: "#%{post_number} by %{username}"

View File

@ -536,6 +536,11 @@ posting:
default: ''
refresh: true
type: list
censored_pattern:
client: true
default: ''
refresh: true
type: regex
enable_emoji:
default: true
client: true

View File

@ -212,7 +212,18 @@ module PrettyText
options[:topicId] = opts[:topic_id]
working_text = text.dup
sanitized = markdown(working_text, options)
begin
sanitized = markdown(working_text, options)
rescue MiniRacer::ScriptTerminatedError => e
if SiteSetting.censored_pattern.present?
Rails.logger.warn "Post cooking timed out. Clearing the censored_pattern setting and retrying."
SiteSetting.censored_pattern = nil
sanitized = markdown(working_text, options)
else
raise e
end
end
doc = Nokogiri::HTML.fragment(sanitized)

View File

@ -32,7 +32,8 @@ module SiteSettingExtension
url_list: 9,
host_list: 10,
category_list: 11,
value_list: 12)
value_list: 12,
regex: 13)
end
def mutex
@ -443,7 +444,8 @@ module SiteSettingExtension
types[:fixnum] => IntegerSettingValidator,
types[:string] => StringSettingValidator,
'list' => StringSettingValidator,
'enum' => StringSettingValidator
'enum' => StringSettingValidator,
'regex' => RegexSettingValidator
}
@validator_mapping[type_name]
end

View File

@ -0,0 +1,25 @@
class RegexSettingValidator
LOREM = 'Lorem ipsum dolor sit amet, consectetur adipiscing elit. Nullam eget sem non elit tincidunt rhoncus.'.freeze
def initialize(opts={})
@opts = opts
end
def valid_value?(val)
!val.present? || valid_regex?(val)
end
# Check that string is a valid regex, and that it doesn't match most of the lorem string.
def valid_regex?(val)
r = Regexp.new(val)
matches = r.match(LOREM)
matches.nil? || matches[0].length < (LOREM.length - 10)
rescue
false
end
def error_message
I18n.t('site_settings.errors.invalid_regex')
end
end

View File

@ -438,4 +438,13 @@ HTML
end
end
describe "censored_pattern site setting" do
it "can be cleared if it causes cooking to timeout" do
SiteSetting.censored_pattern = "evilregex"
described_class.stubs(:markdown).raises(MiniRacer::ScriptTerminatedError)
PrettyText.cook("Protect against it plz.") rescue nil
expect(SiteSetting.censored_pattern).to be_blank
end
end
end

View File

@ -0,0 +1,24 @@
require 'rails_helper'
describe RegexSettingValidator do
describe '#valid_value?' do
subject(:validator) { described_class.new }
it "returns true for blank values" do
expect(validator.valid_value?('')).to eq(true)
expect(validator.valid_value?(nil)).to eq(true)
end
it "return false for invalid regex" do
expect(validator.valid_value?('(()')).to eq(false)
end
it "returns false for regex with dangerous matches" do
expect(validator.valid_value?('(.)*')).to eq(false)
end
it "returns true for safe regex" do
expect(validator.valid_value?('\d{3}-\d{4}')).to eq(true)
end
end
end

View File

@ -11,7 +11,8 @@ const defaultOpts = buildOptions({
emoji_set: 'emoji_one',
highlighted_languages: 'json|ruby|javascript',
default_code_lang: 'auto',
censored_words: 'shucks|whiz|whizzer'
censored_words: 'shucks|whiz|whizzer',
censored_pattern: '\\d{3}-\\d{4}|tech\\w*'
},
getURL: url => url
});
@ -532,6 +533,9 @@ test("censoring", function() {
cooked("The link still works. [whiz](http://www.whiz.com)",
"<p>The link still works. <a href=\"http://www.whiz.com\">&#9632;&#9632;&#9632;&#9632;</a></p>",
"it won't break links by censoring them.");
cooked("Call techapj the computer whiz at 555-555-1234 for free help.",
"<p>Call &#9632;&#9632;&#9632;&#9632;&#9632;&#9632;&#9632; the computer &#9632;&#9632;&#9632;&#9632; at 555-&#9632;&#9632;&#9632;&#9632;&#9632;&#9632;&#9632;&#9632; for free help.</p>",
"uses both censored words and patterns from site settings");
});
test("code blocks/spans hoisting", function() {