FIX: Add word boundaries to replace and tag watched words (#13405)

The generated regular expressions did not contain \b which matched
every text that contained the word, even if it was only a substring of
a word.

For example, if "art" was a watched word a post containing word
"artist" matched.
This commit is contained in:
Bianca Nenciu 2021-06-18 18:54:06 +03:00 committed by GitHub
parent 4afd8f9bdf
commit 74f7295631
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
8 changed files with 38 additions and 18 deletions

View File

@ -118,7 +118,6 @@ acceptance("Admin - Watched Words - Bad regular expressions", function (needs) {
action: "block",
},
],
regular_expressions: true,
compiled_regular_expressions: {
block: null,
censor: null,

View File

@ -11,14 +11,14 @@ export default {
{
id: 7,
word: "hi",
regexp: "hi",
regexp: "(hi)",
replacement: "hello",
action: "replace",
},
{
id: 8,
word: "hello",
regexp: "hello",
regexp: "(hello)",
replacement: "greeting",
action: "tag",
},

View File

@ -1675,21 +1675,21 @@ var bar = 'bar';
test("watched words replace", function (assert) {
const opts = {
watchedWordsReplace: { fun: "times" },
watchedWordsReplace: { "(?:\\W|^)(fun)(?=\\W|$)": "times" },
};
assert.cookedOptions("test fun", opts, "<p>test times</p>");
assert.cookedOptions("test fun funny", opts, "<p>test times funny</p>");
});
test("watched words link", function (assert) {
const opts = {
watchedWordsLink: { fun: "https://discourse.org" },
watchedWordsLink: { "(?:\\W|^)(fun)(?=\\W|$)": "https://discourse.org" },
};
assert.cookedOptions(
"test fun",
"test fun funny",
opts,
'<p>test <a href="https://discourse.org">fun</a></p>'
'<p>test <a href="https://discourse.org">fun</a> funny</p>'
);
});
@ -1697,7 +1697,7 @@ var bar = 'bar';
const maxMatches = 100; // same limit as MD watched-words-replace plugin
const opts = {
siteSettings: { watched_words_regular_expressions: true },
watchedWordsReplace: { "\\bu?\\b": "you" },
watchedWordsReplace: { "(\\bu?\\b)": "you" },
};
assert.cookedOptions(

View File

@ -20,8 +20,8 @@ function findAllMatches(text, matchers) {
count++ < MAX_MATCHES
) {
matches.push({
index: match.index,
text: match[0],
index: match.index + match[0].indexOf(match[1]),
text: match[1],
replacement: matcher.replacement,
link: matcher.link,
});

View File

@ -4,7 +4,7 @@ class WatchedWordSerializer < ApplicationSerializer
attributes :id, :word, :regexp, :replacement, :action
def regexp
WordWatcher.word_to_regexp(word)
WordWatcher.word_to_regexp(word, whole: true)
end
def action

View File

@ -54,17 +54,26 @@ class WordWatcher
def self.word_matcher_regexps(action)
if words = get_cached_words(action)
words.map { |w, r| [word_to_regexp(w), r] }.to_h
words.map { |w, r| [word_to_regexp(w, whole: true), r] }.to_h
end
end
def self.word_to_regexp(word)
def self.word_to_regexp(word, whole: false)
if SiteSetting.watched_words_regular_expressions?
# Strip ruby regexp format if present, we're going to make the whole thing
# case insensitive anyway
return word.start_with?("(?-mix:") ? word[7..-2] : word
regexp = word.start_with?("(?-mix:") ? word[7..-2] : word
regexp = "(#{regexp})" if whole
return regexp
end
Regexp.escape(word).gsub("\\*", '\S*')
regexp = Regexp.escape(word).gsub("\\*", '\S*')
if whole && !SiteSetting.watched_words_regular_expressions?
regexp = "(?:\\W|^)(#{regexp})(?=\\W|$)"
end
regexp
end
def self.word_matcher_regexp_key(action)
@ -144,6 +153,6 @@ class WordWatcher
end
def word_matches?(word)
Regexp.new(WordWatcher.word_to_regexp(word), Regexp::IGNORECASE).match?(@raw)
Regexp.new(WordWatcher.word_to_regexp(word, whole: true), Regexp::IGNORECASE).match?(@raw)
end
end

View File

@ -502,13 +502,21 @@ describe PostCreator do
end
context "without regular expressions" do
it "works" do
it "works with many tags" do
Fabricate(:watched_word, action: WatchedWord.actions[:tag], word: "HELLO", replacement: "greetings , hey")
@post = creator.create
expect(@post.topic.tags.map(&:name)).to match_array(['greetings', 'hey'])
end
it "works with overlapping words" do
Fabricate(:watched_word, action: WatchedWord.actions[:tag], word: "art", replacement: "about-art")
Fabricate(:watched_word, action: WatchedWord.actions[:tag], word: "artist*", replacement: "about-artists")
post = PostCreator.new(user, title: "hello world topic", raw: "this is topic abour artists", archetype_id: 1).create
expect(post.topic.tags.map(&:name)).to match_array(['about-artists'])
end
it "does not treat as regular expressions" do
Fabricate(:watched_word, action: WatchedWord.actions[:tag], word: "he(llo|y)", replacement: "greetings , hey")

View File

@ -1420,6 +1420,10 @@ HTML
expect(PrettyText.cook("Lorem ipsum dolor sittt amet")).to match_html(<<~HTML)
<p>Lorem ipsum something else amet</p>
HTML
expect(PrettyText.cook("Lorem ipsum xdolor sit amet")).to match_html(<<~HTML)
<p>Lorem ipsum xdolor sit amet</p>
HTML
end
it "replaces words with links" do