FIX: Make replace watched words work with wildcard (#13084)

Watched words are always regular expressions, despite watched_words_ _regular_expressions being enabled or not. Internally, wildcard characters are replaced with a regular expression that matches any non whitespace character.
2021-05-18 12:09:47 +03:00 · 2021-05-18 12:09:47 +03:00 · c1dfd76658
parent a21700a444
commit c1dfd76658
5 changed files with 35 additions and 48 deletions
--- a/app/assets/javascripts/pretty-text/engines/discourse-markdown/watched-words-replace.js
+++ b/app/assets/javascripts/pretty-text/engines/discourse-markdown/watched-words-replace.js
@ -6,50 +6,30 @@ function isLinkClose(str) {
  return /^<\/a\s*>/i.test(str);
 }

-function findAllMatches(text, matchers, useRegExp) {
+function findAllMatches(text, matchers) {
  const matches = [];

-  if (useRegExp) {
-    const maxMatches = 100;
-    let count = 0;
+  const maxMatches = 100;
+  let count = 0;

-    matchers.forEach((matcher) => {
-      let match;
-      while (
-        (match = matcher.pattern.exec(text)) !== null &&
-        count++ < maxMatches
-      ) {
-        matches.push({
-          index: match.index,
-          text: match[0],
-          replacement: matcher.replacement,
-        });
-      }
-    });
-  } else {
-    const lowerText = text.toLowerCase();
-    matchers.forEach((matcher) => {
-      const lowerPattern = matcher.pattern.toLowerCase();
-      let index = -1;
-      while ((index = lowerText.indexOf(lowerPattern, index + 1)) !== -1) {
-        matches.push({
-          index,
-          text: text.substr(index, lowerPattern.length),
-          replacement: matcher.replacement,
-        });
-      }
-    });
-  }
+  matchers.forEach((matcher) => {
+    let match;
+    while (
+      (match = matcher.pattern.exec(text)) !== null &&
+      count++ < maxMatches
+    ) {
+      matches.push({
+        index: match.index,
+        text: match[0],
+        replacement: matcher.replacement,
+      });
+    }
+  });

  return matches.sort((a, b) => a.index - b.index);
 }

 export function setup(helper) {
-  helper.registerOptions((opts, siteSettings) => {
-    opts.watchedWordsRegularExpressions =
-      siteSettings.watched_words_regular_expressions;
-  });
-
  helper.registerPlugin((md) => {
    const replacements = md.options.discourse.watchedWordsReplacements;
    if (!replacements) {
@ -57,9 +37,7 @@ export function setup(helper) {
    }

    const matchers = Object.keys(replacements).map((word) => ({
-      pattern: md.options.discourse.watchedWordsRegularExpressions
-        ? new RegExp(word, "gi")
-        : word,
+      pattern: new RegExp(word, "gi"),
      replacement: replacements[word],
    }));

@ -110,12 +88,7 @@ export function setup(helper) {
          if (currentToken.type === "text") {
            const text = currentToken.content;
            const matches = (cache[text] =
-              cache[text] ||
-              findAllMatches(
-                text,
-                matchers,
-                md.options.discourse.watchedWordsRegularExpressions
-              ));
+              cache[text] || findAllMatches(text, matchers));

            // Now split string to nodes
            const nodes = [];
--- a/app/serializers/site_serializer.rb
+++ b/app/serializers/site_serializer.rb
@ -187,7 +187,7 @@ class SiteSerializer < ApplicationSerializer
  end

  def watched_words_replace
-    WordWatcher.get_cached_words(:replace)
+    WordWatcher.word_matcher_regexps(:replace)
  end

  private
--- a/app/services/word_watcher.rb
+++ b/app/services/word_watcher.rb
@ -51,6 +51,12 @@ class WordWatcher
    nil # Admin will be alerted via admin_dashboard_data.rb
  end

+  def self.word_matcher_regexps(action)
+    if words = get_cached_words(action)
+      words.map { |w, r| [word_to_regexp(w), r] }.to_h
+    end
+  end
+
  def self.word_to_regexp(word)
    if SiteSetting.watched_words_regular_expressions?
      # Strip ruby regexp format if present, we're going to make the whole thing
--- a/lib/pretty_text.rb
+++ b/lib/pretty_text.rb
@ -173,7 +173,7 @@ module PrettyText
        __optInput.emojiUnicodeReplacer = __emojiUnicodeReplacer;
        __optInput.lookupUploadUrls = __lookupUploadUrls;
        __optInput.censoredRegexp = #{WordWatcher.word_matcher_regexp(:censor)&.source.to_json};
-        __optInput.watchedWordsReplacements = #{WordWatcher.get_cached_words(:replace).to_json};
+        __optInput.watchedWordsReplacements = #{WordWatcher.word_matcher_regexps(:replace).to_json};
      JS

      if opts[:topicId]
--- a/spec/components/pretty_text_spec.rb
+++ b/spec/components/pretty_text_spec.rb
@ -1401,11 +1401,19 @@ HTML
    after(:all) { Discourse.redis.flushdb }

    it "replaces words with other words" do
-      Fabricate(:watched_word, action: WatchedWord.actions[:replace], word: "dolor sit", replacement: "something else")
+      Fabricate(:watched_word, action: WatchedWord.actions[:replace], word: "dolor sit*", replacement: "something else")

      expect(PrettyText.cook("Lorem ipsum dolor sit amet")).to match_html(<<~HTML)
        <p>Lorem ipsum something else amet</p>
      HTML
+
+      expect(PrettyText.cook("Lorem ipsum dolor sits amet")).to match_html(<<~HTML)
+        <p>Lorem ipsum something else amet</p>
+      HTML
+
+      expect(PrettyText.cook("Lorem ipsum dolor sittt amet")).to match_html(<<~HTML)
+        <p>Lorem ipsum something else amet</p>
+      HTML
    end

    it "replaces words with links" do