FIX: Add word boundaries to replace and tag watched words (#13405)
The generated regular expressions did not contain \b which matched every text that contained the word, even if it was only a substring of a word. For example, if "art" was a watched word a post containing word "artist" matched.
This commit is contained in:
parent
4afd8f9bdf
commit
74f7295631
|
@ -118,7 +118,6 @@ acceptance("Admin - Watched Words - Bad regular expressions", function (needs) {
|
|||
action: "block",
|
||||
},
|
||||
],
|
||||
regular_expressions: true,
|
||||
compiled_regular_expressions: {
|
||||
block: null,
|
||||
censor: null,
|
||||
|
|
|
@ -11,14 +11,14 @@ export default {
|
|||
{
|
||||
id: 7,
|
||||
word: "hi",
|
||||
regexp: "hi",
|
||||
regexp: "(hi)",
|
||||
replacement: "hello",
|
||||
action: "replace",
|
||||
},
|
||||
{
|
||||
id: 8,
|
||||
word: "hello",
|
||||
regexp: "hello",
|
||||
regexp: "(hello)",
|
||||
replacement: "greeting",
|
||||
action: "tag",
|
||||
},
|
||||
|
|
|
@ -1675,21 +1675,21 @@ var bar = 'bar';
|
|||
|
||||
test("watched words replace", function (assert) {
|
||||
const opts = {
|
||||
watchedWordsReplace: { fun: "times" },
|
||||
watchedWordsReplace: { "(?:\\W|^)(fun)(?=\\W|$)": "times" },
|
||||
};
|
||||
|
||||
assert.cookedOptions("test fun", opts, "<p>test times</p>");
|
||||
assert.cookedOptions("test fun funny", opts, "<p>test times funny</p>");
|
||||
});
|
||||
|
||||
test("watched words link", function (assert) {
|
||||
const opts = {
|
||||
watchedWordsLink: { fun: "https://discourse.org" },
|
||||
watchedWordsLink: { "(?:\\W|^)(fun)(?=\\W|$)": "https://discourse.org" },
|
||||
};
|
||||
|
||||
assert.cookedOptions(
|
||||
"test fun",
|
||||
"test fun funny",
|
||||
opts,
|
||||
'<p>test <a href="https://discourse.org">fun</a></p>'
|
||||
'<p>test <a href="https://discourse.org">fun</a> funny</p>'
|
||||
);
|
||||
});
|
||||
|
||||
|
@ -1697,7 +1697,7 @@ var bar = 'bar';
|
|||
const maxMatches = 100; // same limit as MD watched-words-replace plugin
|
||||
const opts = {
|
||||
siteSettings: { watched_words_regular_expressions: true },
|
||||
watchedWordsReplace: { "\\bu?\\b": "you" },
|
||||
watchedWordsReplace: { "(\\bu?\\b)": "you" },
|
||||
};
|
||||
|
||||
assert.cookedOptions(
|
||||
|
|
|
@ -20,8 +20,8 @@ function findAllMatches(text, matchers) {
|
|||
count++ < MAX_MATCHES
|
||||
) {
|
||||
matches.push({
|
||||
index: match.index,
|
||||
text: match[0],
|
||||
index: match.index + match[0].indexOf(match[1]),
|
||||
text: match[1],
|
||||
replacement: matcher.replacement,
|
||||
link: matcher.link,
|
||||
});
|
||||
|
|
|
@ -4,7 +4,7 @@ class WatchedWordSerializer < ApplicationSerializer
|
|||
attributes :id, :word, :regexp, :replacement, :action
|
||||
|
||||
def regexp
|
||||
WordWatcher.word_to_regexp(word)
|
||||
WordWatcher.word_to_regexp(word, whole: true)
|
||||
end
|
||||
|
||||
def action
|
||||
|
|
|
@ -54,17 +54,26 @@ class WordWatcher
|
|||
|
||||
def self.word_matcher_regexps(action)
|
||||
if words = get_cached_words(action)
|
||||
words.map { |w, r| [word_to_regexp(w), r] }.to_h
|
||||
words.map { |w, r| [word_to_regexp(w, whole: true), r] }.to_h
|
||||
end
|
||||
end
|
||||
|
||||
def self.word_to_regexp(word)
|
||||
def self.word_to_regexp(word, whole: false)
|
||||
if SiteSetting.watched_words_regular_expressions?
|
||||
# Strip ruby regexp format if present, we're going to make the whole thing
|
||||
# case insensitive anyway
|
||||
return word.start_with?("(?-mix:") ? word[7..-2] : word
|
||||
regexp = word.start_with?("(?-mix:") ? word[7..-2] : word
|
||||
regexp = "(#{regexp})" if whole
|
||||
return regexp
|
||||
end
|
||||
Regexp.escape(word).gsub("\\*", '\S*')
|
||||
|
||||
regexp = Regexp.escape(word).gsub("\\*", '\S*')
|
||||
|
||||
if whole && !SiteSetting.watched_words_regular_expressions?
|
||||
regexp = "(?:\\W|^)(#{regexp})(?=\\W|$)"
|
||||
end
|
||||
|
||||
regexp
|
||||
end
|
||||
|
||||
def self.word_matcher_regexp_key(action)
|
||||
|
@ -144,6 +153,6 @@ class WordWatcher
|
|||
end
|
||||
|
||||
def word_matches?(word)
|
||||
Regexp.new(WordWatcher.word_to_regexp(word), Regexp::IGNORECASE).match?(@raw)
|
||||
Regexp.new(WordWatcher.word_to_regexp(word, whole: true), Regexp::IGNORECASE).match?(@raw)
|
||||
end
|
||||
end
|
||||
|
|
|
@ -502,13 +502,21 @@ describe PostCreator do
|
|||
end
|
||||
|
||||
context "without regular expressions" do
|
||||
it "works" do
|
||||
it "works with many tags" do
|
||||
Fabricate(:watched_word, action: WatchedWord.actions[:tag], word: "HELLO", replacement: "greetings , hey")
|
||||
|
||||
@post = creator.create
|
||||
expect(@post.topic.tags.map(&:name)).to match_array(['greetings', 'hey'])
|
||||
end
|
||||
|
||||
it "works with overlapping words" do
|
||||
Fabricate(:watched_word, action: WatchedWord.actions[:tag], word: "art", replacement: "about-art")
|
||||
Fabricate(:watched_word, action: WatchedWord.actions[:tag], word: "artist*", replacement: "about-artists")
|
||||
|
||||
post = PostCreator.new(user, title: "hello world topic", raw: "this is topic abour artists", archetype_id: 1).create
|
||||
expect(post.topic.tags.map(&:name)).to match_array(['about-artists'])
|
||||
end
|
||||
|
||||
it "does not treat as regular expressions" do
|
||||
Fabricate(:watched_word, action: WatchedWord.actions[:tag], word: "he(llo|y)", replacement: "greetings , hey")
|
||||
|
||||
|
|
|
@ -1420,6 +1420,10 @@ HTML
|
|||
expect(PrettyText.cook("Lorem ipsum dolor sittt amet")).to match_html(<<~HTML)
|
||||
<p>Lorem ipsum something else amet</p>
|
||||
HTML
|
||||
|
||||
expect(PrettyText.cook("Lorem ipsum xdolor sit amet")).to match_html(<<~HTML)
|
||||
<p>Lorem ipsum xdolor sit amet</p>
|
||||
HTML
|
||||
end
|
||||
|
||||
it "replaces words with links" do
|
||||
|
|
Loading…
Reference in New Issue