diff --git a/app/assets/javascripts/pretty-text/censored-words.js.es6 b/app/assets/javascripts/pretty-text/censored-words.js.es6 index a715b757ec8..e81032393f8 100644 --- a/app/assets/javascripts/pretty-text/censored-words.js.es6 +++ b/app/assets/javascripts/pretty-text/censored-words.js.es6 @@ -1,19 +1,38 @@ -export function censor(text, censoredWords) { - if (censoredWords && censoredWords.length) { - const split = censoredWords.split("|"); - let censorRegexp; - if (split && split.length) { - censorRegexp = new RegExp("(\\b(?:" + split.map(function (t) { return "(" + t.replace(/[-/\\^$*+?.()|[\]{}]/g, '\\$&') + ")"; }).join("|") + ")\\b)(?![^\\(]*\\))", "ig"); - } +export function censor(text, censoredWords, censoredPattern) { + let patterns = [], + originalText = text; - if (censorRegexp) { - let m = censorRegexp.exec(text); - while (m && m[0]) { - const replacement = new Array(m[0].length+1).join('■'); - text = text.replace(new RegExp("(\\b" + m[0] + "\\b)(?![^\\(]*\\))", "ig"), replacement); - m = censorRegexp.exec(text); + if (censoredWords && censoredWords.length) { + patterns = censoredWords.split("|").map(t => { return "(" + t.replace(/[-/\\^$*+?.()|[\]{}]/g, '\\$&') + ")"; }); + } + + if (censoredPattern && censoredPattern.length > 0) { + try { + new RegExp(censoredPattern); // exception if invalid + patterns.push("(" + censoredPattern + ")"); + } catch(e) {} + } + + if (patterns.length) { + let censorRegexp; + + try { + censorRegexp = new RegExp("(\\b(?:" + patterns.join("|") + ")\\b)(?![^\\(]*\\))", "ig"); + + if (censorRegexp) { + let m = censorRegexp.exec(text); + + while (m && m[0]) { + if (m[0].length > originalText.length) { return originalText; } // regex is dangerous + const replacement = new Array(m[0].length+1).join('■'); + text = text.replace(new RegExp("(\\b" + m[0] + "\\b)(?![^\\(]*\\))", "ig"), replacement); + m = censorRegexp.exec(text); + } } + } catch(e) { + return originalText; } } + return text; } diff --git a/app/assets/javascripts/pretty-text/engines/discourse-markdown/censored.js.es6 b/app/assets/javascripts/pretty-text/engines/discourse-markdown/censored.js.es6 index ecb463f8c37..b0ac2e5acc5 100644 --- a/app/assets/javascripts/pretty-text/engines/discourse-markdown/censored.js.es6 +++ b/app/assets/javascripts/pretty-text/engines/discourse-markdown/censored.js.es6 @@ -4,10 +4,12 @@ import { registerOption } from 'pretty-text/pretty-text'; registerOption((siteSettings, opts) => { opts.features.censored = true; opts.censoredWords = siteSettings.censored_words; + opts.censoredPattern = siteSettings.censored_pattern; }); export function setup(helper) { helper.addPreProcessor(text => { - return censor(text, helper.getOptions().censoredWords); + const options = helper.getOptions(); + return censor(text, options.censoredWords, options.censoredPattern); }); } diff --git a/config/locales/server.en.yml b/config/locales/server.en.yml index e693cdf043c..04b2e4d94e6 100644 --- a/config/locales/server.en.yml +++ b/config/locales/server.en.yml @@ -828,6 +828,7 @@ en: site_settings: censored_words: "Words that will be automatically replaced with ■■■■" + censored_pattern: "Regex pattern that will be automatically replaced with ■■■■" delete_old_hidden_posts: "Auto-delete any hidden posts that stay hidden for more than 30 days." default_locale: "The default language of this Discourse instance (ISO 639-1 Code)" allow_user_locale: "Allow users to choose their own language interface preference" @@ -1442,6 +1443,7 @@ en: reply_by_email_address_is_empty: "You must set a 'reply by email address' before enabling reply by email." email_polling_disabled: "You must enable either manual or POP3 polling before enabling reply by email." user_locale_not_enabled: "You must first enable 'allow user locale' before enabling this setting." + invalid_regex: "Regex is invalid or not allowed." search: within_post: "#%{post_number} by %{username}" diff --git a/config/site_settings.yml b/config/site_settings.yml index f93bc567096..92776b8eeae 100644 --- a/config/site_settings.yml +++ b/config/site_settings.yml @@ -536,6 +536,11 @@ posting: default: '' refresh: true type: list + censored_pattern: + client: true + default: '' + refresh: true + type: regex enable_emoji: default: true client: true diff --git a/lib/pretty_text.rb b/lib/pretty_text.rb index d3d6d4ed422..412ca02d3a2 100644 --- a/lib/pretty_text.rb +++ b/lib/pretty_text.rb @@ -212,7 +212,18 @@ module PrettyText options[:topicId] = opts[:topic_id] working_text = text.dup - sanitized = markdown(working_text, options) + + begin + sanitized = markdown(working_text, options) + rescue MiniRacer::ScriptTerminatedError => e + if SiteSetting.censored_pattern.present? + Rails.logger.warn "Post cooking timed out. Clearing the censored_pattern setting and retrying." + SiteSetting.censored_pattern = nil + sanitized = markdown(working_text, options) + else + raise e + end + end doc = Nokogiri::HTML.fragment(sanitized) diff --git a/lib/site_setting_extension.rb b/lib/site_setting_extension.rb index 3fb3b01550a..70e576f08de 100644 --- a/lib/site_setting_extension.rb +++ b/lib/site_setting_extension.rb @@ -32,7 +32,8 @@ module SiteSettingExtension url_list: 9, host_list: 10, category_list: 11, - value_list: 12) + value_list: 12, + regex: 13) end def mutex @@ -443,7 +444,8 @@ module SiteSettingExtension types[:fixnum] => IntegerSettingValidator, types[:string] => StringSettingValidator, 'list' => StringSettingValidator, - 'enum' => StringSettingValidator + 'enum' => StringSettingValidator, + 'regex' => RegexSettingValidator } @validator_mapping[type_name] end diff --git a/lib/validators/regex_setting_validator.rb b/lib/validators/regex_setting_validator.rb new file mode 100644 index 00000000000..e1aa9900b64 --- /dev/null +++ b/lib/validators/regex_setting_validator.rb @@ -0,0 +1,25 @@ +class RegexSettingValidator + + LOREM = 'Lorem ipsum dolor sit amet, consectetur adipiscing elit. Nullam eget sem non elit tincidunt rhoncus.'.freeze + + def initialize(opts={}) + @opts = opts + end + + def valid_value?(val) + !val.present? || valid_regex?(val) + end + + # Check that string is a valid regex, and that it doesn't match most of the lorem string. + def valid_regex?(val) + r = Regexp.new(val) + matches = r.match(LOREM) + matches.nil? || matches[0].length < (LOREM.length - 10) + rescue + false + end + + def error_message + I18n.t('site_settings.errors.invalid_regex') + end +end diff --git a/spec/components/pretty_text_spec.rb b/spec/components/pretty_text_spec.rb index 109181333fa..63577b90e9c 100644 --- a/spec/components/pretty_text_spec.rb +++ b/spec/components/pretty_text_spec.rb @@ -438,4 +438,13 @@ HTML end end + describe "censored_pattern site setting" do + it "can be cleared if it causes cooking to timeout" do + SiteSetting.censored_pattern = "evilregex" + described_class.stubs(:markdown).raises(MiniRacer::ScriptTerminatedError) + PrettyText.cook("Protect against it plz.") rescue nil + expect(SiteSetting.censored_pattern).to be_blank + end + end + end diff --git a/spec/components/validators/regex_setting_validator_spec.rb b/spec/components/validators/regex_setting_validator_spec.rb new file mode 100644 index 00000000000..b7bb82f24e1 --- /dev/null +++ b/spec/components/validators/regex_setting_validator_spec.rb @@ -0,0 +1,24 @@ +require 'rails_helper' + +describe RegexSettingValidator do + describe '#valid_value?' do + subject(:validator) { described_class.new } + + it "returns true for blank values" do + expect(validator.valid_value?('')).to eq(true) + expect(validator.valid_value?(nil)).to eq(true) + end + + it "return false for invalid regex" do + expect(validator.valid_value?('(()')).to eq(false) + end + + it "returns false for regex with dangerous matches" do + expect(validator.valid_value?('(.)*')).to eq(false) + end + + it "returns true for safe regex" do + expect(validator.valid_value?('\d{3}-\d{4}')).to eq(true) + end + end +end diff --git a/test/javascripts/lib/pretty-text-test.js.es6 b/test/javascripts/lib/pretty-text-test.js.es6 index fb9c04f0ed9..790967119a3 100644 --- a/test/javascripts/lib/pretty-text-test.js.es6 +++ b/test/javascripts/lib/pretty-text-test.js.es6 @@ -11,7 +11,8 @@ const defaultOpts = buildOptions({ emoji_set: 'emoji_one', highlighted_languages: 'json|ruby|javascript', default_code_lang: 'auto', - censored_words: 'shucks|whiz|whizzer' + censored_words: 'shucks|whiz|whizzer', + censored_pattern: '\\d{3}-\\d{4}|tech\\w*' }, getURL: url => url }); @@ -532,6 +533,9 @@ test("censoring", function() { cooked("The link still works. [whiz](http://www.whiz.com)", "

The link still works. ■■■■

", "it won't break links by censoring them."); + cooked("Call techapj the computer whiz at 555-555-1234 for free help.", + "

Call ■■■■■■■ the computer ■■■■ at 555-■■■■■■■■ for free help.

", + "uses both censored words and patterns from site settings"); }); test("code blocks/spans hoisting", function() {