FEATURE: add censored_pattern setting to censor posts using regex
This commit is contained in:
parent
7d560ea3d5
commit
86522a52b7
|
@ -1,19 +1,38 @@
|
|||
export function censor(text, censoredWords) {
|
||||
if (censoredWords && censoredWords.length) {
|
||||
const split = censoredWords.split("|");
|
||||
let censorRegexp;
|
||||
if (split && split.length) {
|
||||
censorRegexp = new RegExp("(\\b(?:" + split.map(function (t) { return "(" + t.replace(/[-/\\^$*+?.()|[\]{}]/g, '\\$&') + ")"; }).join("|") + ")\\b)(?![^\\(]*\\))", "ig");
|
||||
}
|
||||
export function censor(text, censoredWords, censoredPattern) {
|
||||
let patterns = [],
|
||||
originalText = text;
|
||||
|
||||
if (censorRegexp) {
|
||||
let m = censorRegexp.exec(text);
|
||||
while (m && m[0]) {
|
||||
const replacement = new Array(m[0].length+1).join('■');
|
||||
text = text.replace(new RegExp("(\\b" + m[0] + "\\b)(?![^\\(]*\\))", "ig"), replacement);
|
||||
m = censorRegexp.exec(text);
|
||||
if (censoredWords && censoredWords.length) {
|
||||
patterns = censoredWords.split("|").map(t => { return "(" + t.replace(/[-/\\^$*+?.()|[\]{}]/g, '\\$&') + ")"; });
|
||||
}
|
||||
|
||||
if (censoredPattern && censoredPattern.length > 0) {
|
||||
try {
|
||||
new RegExp(censoredPattern); // exception if invalid
|
||||
patterns.push("(" + censoredPattern + ")");
|
||||
} catch(e) {}
|
||||
}
|
||||
|
||||
if (patterns.length) {
|
||||
let censorRegexp;
|
||||
|
||||
try {
|
||||
censorRegexp = new RegExp("(\\b(?:" + patterns.join("|") + ")\\b)(?![^\\(]*\\))", "ig");
|
||||
|
||||
if (censorRegexp) {
|
||||
let m = censorRegexp.exec(text);
|
||||
|
||||
while (m && m[0]) {
|
||||
if (m[0].length > originalText.length) { return originalText; } // regex is dangerous
|
||||
const replacement = new Array(m[0].length+1).join('■');
|
||||
text = text.replace(new RegExp("(\\b" + m[0] + "\\b)(?![^\\(]*\\))", "ig"), replacement);
|
||||
m = censorRegexp.exec(text);
|
||||
}
|
||||
}
|
||||
} catch(e) {
|
||||
return originalText;
|
||||
}
|
||||
}
|
||||
|
||||
return text;
|
||||
}
|
||||
|
|
|
@ -4,10 +4,12 @@ import { registerOption } from 'pretty-text/pretty-text';
|
|||
registerOption((siteSettings, opts) => {
|
||||
opts.features.censored = true;
|
||||
opts.censoredWords = siteSettings.censored_words;
|
||||
opts.censoredPattern = siteSettings.censored_pattern;
|
||||
});
|
||||
|
||||
export function setup(helper) {
|
||||
helper.addPreProcessor(text => {
|
||||
return censor(text, helper.getOptions().censoredWords);
|
||||
const options = helper.getOptions();
|
||||
return censor(text, options.censoredWords, options.censoredPattern);
|
||||
});
|
||||
}
|
||||
|
|
|
@ -828,6 +828,7 @@ en:
|
|||
|
||||
site_settings:
|
||||
censored_words: "Words that will be automatically replaced with ■■■■"
|
||||
censored_pattern: "Regex pattern that will be automatically replaced with ■■■■"
|
||||
delete_old_hidden_posts: "Auto-delete any hidden posts that stay hidden for more than 30 days."
|
||||
default_locale: "The default language of this Discourse instance (ISO 639-1 Code)"
|
||||
allow_user_locale: "Allow users to choose their own language interface preference"
|
||||
|
@ -1442,6 +1443,7 @@ en:
|
|||
reply_by_email_address_is_empty: "You must set a 'reply by email address' before enabling reply by email."
|
||||
email_polling_disabled: "You must enable either manual or POP3 polling before enabling reply by email."
|
||||
user_locale_not_enabled: "You must first enable 'allow user locale' before enabling this setting."
|
||||
invalid_regex: "Regex is invalid or not allowed."
|
||||
|
||||
search:
|
||||
within_post: "#%{post_number} by %{username}"
|
||||
|
|
|
@ -536,6 +536,11 @@ posting:
|
|||
default: ''
|
||||
refresh: true
|
||||
type: list
|
||||
censored_pattern:
|
||||
client: true
|
||||
default: ''
|
||||
refresh: true
|
||||
type: regex
|
||||
enable_emoji:
|
||||
default: true
|
||||
client: true
|
||||
|
|
|
@ -212,7 +212,18 @@ module PrettyText
|
|||
options[:topicId] = opts[:topic_id]
|
||||
|
||||
working_text = text.dup
|
||||
sanitized = markdown(working_text, options)
|
||||
|
||||
begin
|
||||
sanitized = markdown(working_text, options)
|
||||
rescue MiniRacer::ScriptTerminatedError => e
|
||||
if SiteSetting.censored_pattern.present?
|
||||
Rails.logger.warn "Post cooking timed out. Clearing the censored_pattern setting and retrying."
|
||||
SiteSetting.censored_pattern = nil
|
||||
sanitized = markdown(working_text, options)
|
||||
else
|
||||
raise e
|
||||
end
|
||||
end
|
||||
|
||||
doc = Nokogiri::HTML.fragment(sanitized)
|
||||
|
||||
|
|
|
@ -32,7 +32,8 @@ module SiteSettingExtension
|
|||
url_list: 9,
|
||||
host_list: 10,
|
||||
category_list: 11,
|
||||
value_list: 12)
|
||||
value_list: 12,
|
||||
regex: 13)
|
||||
end
|
||||
|
||||
def mutex
|
||||
|
@ -443,7 +444,8 @@ module SiteSettingExtension
|
|||
types[:fixnum] => IntegerSettingValidator,
|
||||
types[:string] => StringSettingValidator,
|
||||
'list' => StringSettingValidator,
|
||||
'enum' => StringSettingValidator
|
||||
'enum' => StringSettingValidator,
|
||||
'regex' => RegexSettingValidator
|
||||
}
|
||||
@validator_mapping[type_name]
|
||||
end
|
||||
|
|
|
@ -0,0 +1,25 @@
|
|||
class RegexSettingValidator
|
||||
|
||||
LOREM = 'Lorem ipsum dolor sit amet, consectetur adipiscing elit. Nullam eget sem non elit tincidunt rhoncus.'.freeze
|
||||
|
||||
def initialize(opts={})
|
||||
@opts = opts
|
||||
end
|
||||
|
||||
def valid_value?(val)
|
||||
!val.present? || valid_regex?(val)
|
||||
end
|
||||
|
||||
# Check that string is a valid regex, and that it doesn't match most of the lorem string.
|
||||
def valid_regex?(val)
|
||||
r = Regexp.new(val)
|
||||
matches = r.match(LOREM)
|
||||
matches.nil? || matches[0].length < (LOREM.length - 10)
|
||||
rescue
|
||||
false
|
||||
end
|
||||
|
||||
def error_message
|
||||
I18n.t('site_settings.errors.invalid_regex')
|
||||
end
|
||||
end
|
|
@ -438,4 +438,13 @@ HTML
|
|||
end
|
||||
end
|
||||
|
||||
describe "censored_pattern site setting" do
|
||||
it "can be cleared if it causes cooking to timeout" do
|
||||
SiteSetting.censored_pattern = "evilregex"
|
||||
described_class.stubs(:markdown).raises(MiniRacer::ScriptTerminatedError)
|
||||
PrettyText.cook("Protect against it plz.") rescue nil
|
||||
expect(SiteSetting.censored_pattern).to be_blank
|
||||
end
|
||||
end
|
||||
|
||||
end
|
||||
|
|
|
@ -0,0 +1,24 @@
|
|||
require 'rails_helper'
|
||||
|
||||
describe RegexSettingValidator do
|
||||
describe '#valid_value?' do
|
||||
subject(:validator) { described_class.new }
|
||||
|
||||
it "returns true for blank values" do
|
||||
expect(validator.valid_value?('')).to eq(true)
|
||||
expect(validator.valid_value?(nil)).to eq(true)
|
||||
end
|
||||
|
||||
it "return false for invalid regex" do
|
||||
expect(validator.valid_value?('(()')).to eq(false)
|
||||
end
|
||||
|
||||
it "returns false for regex with dangerous matches" do
|
||||
expect(validator.valid_value?('(.)*')).to eq(false)
|
||||
end
|
||||
|
||||
it "returns true for safe regex" do
|
||||
expect(validator.valid_value?('\d{3}-\d{4}')).to eq(true)
|
||||
end
|
||||
end
|
||||
end
|
|
@ -11,7 +11,8 @@ const defaultOpts = buildOptions({
|
|||
emoji_set: 'emoji_one',
|
||||
highlighted_languages: 'json|ruby|javascript',
|
||||
default_code_lang: 'auto',
|
||||
censored_words: 'shucks|whiz|whizzer'
|
||||
censored_words: 'shucks|whiz|whizzer',
|
||||
censored_pattern: '\\d{3}-\\d{4}|tech\\w*'
|
||||
},
|
||||
getURL: url => url
|
||||
});
|
||||
|
@ -532,6 +533,9 @@ test("censoring", function() {
|
|||
cooked("The link still works. [whiz](http://www.whiz.com)",
|
||||
"<p>The link still works. <a href=\"http://www.whiz.com\">■■■■</a></p>",
|
||||
"it won't break links by censoring them.");
|
||||
cooked("Call techapj the computer whiz at 555-555-1234 for free help.",
|
||||
"<p>Call ■■■■■■■ the computer ■■■■ at 555-■■■■■■■■ for free help.</p>",
|
||||
"uses both censored words and patterns from site settings");
|
||||
});
|
||||
|
||||
test("code blocks/spans hoisting", function() {
|
||||
|
|
Loading…
Reference in New Issue