discourse/app/models/watched_word.rb

# frozen_string_literal: true

class WatchedWord < ActiveRecord::Base
  MAX_WORDS_PER_ACTION = 2000

  before_validation { self.word = WatchedWord.normalize_word(self.word) }

  before_validation do
    if self.action == WatchedWord.actions[:link] && self.replacement !~ %r{\Ahttps?://}
      self.replacement =
        "#{Discourse.base_url}#{self.replacement&.starts_with?("/") ? "" : "/"}#{self.replacement}"
    end
  end

  validates :word, presence: true, uniqueness: true, length: { maximum: 100 }
  validates :action, presence: true
  validate :replacement_is_url, if: -> { action == WatchedWord.actions[:link] }
  validate :replacement_is_tag_list, if: -> { action == WatchedWord.actions[:tag] }

  validates_each :word do |record, attr, val|
    if WatchedWord.where(action: record.action).count >= MAX_WORDS_PER_ACTION
      record.errors.add(:word, :too_many)
    end
  end

  after_save -> { WordWatcher.clear_cache! }
  after_destroy -> { WordWatcher.clear_cache! }

  scope :for,
        ->(word:) do
          where(
            "(word ILIKE :word AND case_sensitive = 'f') OR (word LIKE :word AND case_sensitive = 't')",
            word: word,
          )
        end

  def self.actions
    @actions ||=
      Enum.new(
        block: 1,
        censor: 2,
        require_approval: 3,
        flag: 4,
        link: 8,
        replace: 5,
        tag: 6,
        silence: 7,
      )
  end

  def self.create_or_update_word(params)
    word = normalize_word(params[:word])
    word = self.for(word: word).first_or_initialize(word: word)
    word.replacement = params[:replacement] if params[:replacement]
    word.action_key = params[:action_key] if params[:action_key]
    word.action = params[:action] if params[:action]
    word.case_sensitive = params[:case_sensitive] if !params[:case_sensitive].nil?
    word.save
    word
  end

  def self.has_replacement?(action)
    action == :replace || action == :tag || action == :link
  end

  def action_key=(arg)
    self.action = WatchedWord.actions[arg.to_sym]
  end

  def action_log_details
    if replacement.present?
      "#{word} → #{replacement}"
    else
      word
    end
  end

  private

  def self.normalize_word(word)
    # When a regular expression is converted to a string, it is wrapped with
    # '(?-mix:' and ')'
    word = word[7..-2] if word.start_with?("(?-mix:")

    word.strip.squeeze("*")
  end

  def replacement_is_url
    errors.add(:base, :invalid_url) if replacement !~ URI.regexp
  end

  def replacement_is_tag_list
    tag_list = replacement&.split(",")
    tags = Tag.where(name: tag_list)
    if tag_list.blank? || tags.empty? || tag_list.size != tags.size
      errors.add(:base, :invalid_tag_list)
    end
  end
end

# == Schema Information
#
# Table name: watched_words
#
#  id             :integer          not null, primary key
#  word           :string           not null
#  action         :integer          not null
#  created_at     :datetime         not null
#  updated_at     :datetime         not null
#  replacement    :string
#  case_sensitive :boolean          default(FALSE), not null
#
# Indexes
#
#  index_watched_words_on_action_and_word  (action,word) UNIQUE
#
DEV: enable frozen string literal on all files This reduces chances of errors where consumers of strings mutate inputs and reduces memory usage of the app. Test suite passes now, but there may be some stuff left, so we will run a few sites on a branch prior to merging 2019-05-02 18:17:27 -04:00			`# frozen_string_literal: true`

FEATURE: Watched Words: when posts contain words, do one of flag, require approval, censor, or block 2017-06-28 16:56:44 -04:00			`class WatchedWord < ActiveRecord::Base`
Have doubled the size of the Watched Words Per Action Limit 2020-12-16 10:13:15 -05:00			`MAX_WORDS_PER_ACTION = 2000`
FEATURE: Watched Words: when posts contain words, do one of flag, require approval, censor, or block 2017-06-28 16:56:44 -04:00
DEV: Refactor watched words (#24163) - Ignore only invalid words, not all words if one of them is invalid - The naming scheme for methods was inconsistent - Optimize regular expressions 2023-11-01 10:41:10 -04:00			`before_validation { self.word = WatchedWord.normalize_word(self.word) }`

FEATURE: Watched Words: when posts contain words, do one of flag, require approval, censor, or block 2017-06-28 16:56:44 -04:00			`before_validation do`
DEV: Refactor watched words (#24163) - Ignore only invalid words, not all words if one of them is invalid - The naming scheme for methods was inconsistent - Optimize regular expressions 2023-11-01 10:41:10 -04:00			`if self.action == WatchedWord.actions[:link] && self.replacement !~ %r{\Ahttps?://}`
DEV: Add test for link watched words (#13251) 2021-06-02 21:36:07 -04:00			`self.replacement =`
			`"#{Discourse.base_url}#{self.replacement&.starts_with?("/") ? "" : "/"}#{self.replacement}"`
FIX: Split link watched words from replace (#13196) It was not clear that replace watched words can be used to replace text with URLs. This introduces a new watched word type that makes it easier to understand. 2021-06-02 01:36:49 -04:00			`end`
FEATURE: Watched Words: when posts contain words, do one of flag, require approval, censor, or block 2017-06-28 16:56:44 -04:00			`end`

FEATURE: Increase maximum watched word length from 50 to 100 chars (#11437) This is useful for more complex regex watched words https://meta.discourse.org/t/166249 2020-12-09 01:45:34 -05:00			`validates :word, presence: true, uniqueness: true, length: { maximum: 100 }`
FEATURE: Watched Words: when posts contain words, do one of flag, require approval, censor, or block 2017-06-28 16:56:44 -04:00			`validates :action, presence: true`
FIX: Split link watched words from replace (#13196) It was not clear that replace watched words can be used to replace text with URLs. This introduces a new watched word type that makes it easier to understand. 2021-06-02 01:36:49 -04:00			`validate :replacement_is_url, if: -> { action == WatchedWord.actions[:link] }`
FEATURE: Validate tags in WatchedWords (#17254) * FEATURE: Validate tags in WatchedWords We didn't validate watched words automatic tagging, so it was possible for an admin to created watched words with an empty tag list which would result in an exception when users tried to create a new topic that matched the misconfigured watched word. Bug report: https://meta.discourse.org/t/lib-topic-creator-fails-when-the-word-math-appears-in-the-topic-title-or-text/231018?u=falco 2022-06-27 15:16:33 -04:00			`validate :replacement_is_tag_list, if: -> { action == WatchedWord.actions[:tag] }`
FIX: Split link watched words from replace (#13196) It was not clear that replace watched words can be used to replace text with URLs. This introduces a new watched word type that makes it easier to understand. 2021-06-02 01:36:49 -04:00
FEATURE: Watched Words: when posts contain words, do one of flag, require approval, censor, or block 2017-06-28 16:56:44 -04:00			`validates_each :word do \|record, attr, val\|`
			`if WatchedWord.where(action: record.action).count >= MAX_WORDS_PER_ACTION`
			`record.errors.add(:word, :too_many)`
			`end`
			`end`

DEV: Refactor watched words (#24163) - Ignore only invalid words, not all words if one of them is invalid - The naming scheme for methods was inconsistent - Optimize regular expressions 2023-11-01 10:41:10 -04:00			`after_save -> { WordWatcher.clear_cache! }`
			`after_destroy -> { WordWatcher.clear_cache! }`
FEATURE: Watched Words: when posts contain words, do one of flag, require approval, censor, or block 2017-06-28 16:56:44 -04:00
FIX: Allow to add the same watched word with a different case (#17799) Currently we can’t add a case-sensitive watched word if another one exists with a different case. For example, the existing watched word `Meta` has been created and is case-sensitive. Now an admin tries to add `metA` while marking it as case-sensitive too, this won’t work and the word won’t be added. This patch changes this behavior by allowing to add same words that have different cases, so the example above will now work as expected. We still check for uniqueness but case-sensitivy is now taken into account. It means that if the watched word `meta` already exists and is not case-sensitive then it will not be possible to add `Meta` (case-sensitive or not) as `meta` already matches every possible variations of this word. 2022-08-05 06:18:17 -04:00			`scope :for,`
DEV: update syntax tree to latest (#24623) update format to latest syntax tree 2023-11-29 00:38:07 -05:00			`->(word:) do`
FIX: Allow to add the same watched word with a different case (#17799) Currently we can’t add a case-sensitive watched word if another one exists with a different case. For example, the existing watched word `Meta` has been created and is case-sensitive. Now an admin tries to add `metA` while marking it as case-sensitive too, this won’t work and the word won’t be added. This patch changes this behavior by allowing to add same words that have different cases, so the example above will now work as expected. We still check for uniqueness but case-sensitivy is now taken into account. It means that if the watched word `meta` already exists and is not case-sensitive then it will not be possible to add `Meta` (case-sensitive or not) as `meta` already matches every possible variations of this word. 2022-08-05 06:18:17 -04:00			`where(`
			`"(word ILIKE :word AND case_sensitive = 'f') OR (word LIKE :word AND case_sensitive = 't')",`
			`word: word,`
			`)`
DEV: update syntax tree to latest (#24623) update format to latest syntax tree 2023-11-29 00:38:07 -05:00			`end`
FEATURE: Watched Words: when posts contain words, do one of flag, require approval, censor, or block 2017-06-28 16:56:44 -04:00
DEV: Refactor watched words (#24163) - Ignore only invalid words, not all words if one of them is invalid - The naming scheme for methods was inconsistent - Optimize regular expressions 2023-11-01 10:41:10 -04:00			`def self.actions`
			`@actions \|\|=`
			`Enum.new(`
			`block: 1,`
			`censor: 2,`
			`require_approval: 3,`
			`flag: 4,`
			`link: 8,`
			`replace: 5,`
			`tag: 6,`
			`silence: 7,`
			`)`
FEATURE: Validate tags in WatchedWords (#17254) * FEATURE: Validate tags in WatchedWords We didn't validate watched words automatic tagging, so it was possible for an admin to created watched words with an empty tag list which would result in an exception when users tried to create a new topic that matched the misconfigured watched word. Bug report: https://meta.discourse.org/t/lib-topic-creator-fails-when-the-word-math-appears-in-the-topic-title-or-text/231018?u=falco 2022-06-27 15:16:33 -04:00			`end`

FEATURE: Watched Words: when posts contain words, do one of flag, require approval, censor, or block 2017-06-28 16:56:44 -04:00			`def self.create_or_update_word(params)`
DEV: Refactor watched words (#24163) - Ignore only invalid words, not all words if one of them is invalid - The naming scheme for methods was inconsistent - Optimize regular expressions 2023-11-01 10:41:10 -04:00			`word = normalize_word(params[:word])`
			`word = self.for(word: word).first_or_initialize(word: word)`
			`word.replacement = params[:replacement] if params[:replacement]`
			`word.action_key = params[:action_key] if params[:action_key]`
			`word.action = params[:action] if params[:action]`
			`word.case_sensitive = params[:case_sensitive] if !params[:case_sensitive].nil?`
			`word.save`
			`word`
FEATURE: Watched Words: when posts contain words, do one of flag, require approval, censor, or block 2017-06-28 16:56:44 -04:00			`end`

FEATURE: Import and export watched word (#12444) Find & Replace and Autotag watched words were not completely exported and import did not work with these either. This commit changes the input and output format to CSV, which allows for a secondary column. This change is backwards compatible because a CSV file with only one column has one value per line. 2021-03-22 16:32:18 -04:00			`def self.has_replacement?(action)`
FIX: Split link watched words from replace (#13196) It was not clear that replace watched words can be used to replace text with URLs. This introduces a new watched word type that makes it easier to understand. 2021-06-02 01:36:49 -04:00			`action == :replace \|\| action == :tag \|\| action == :link`
FEATURE: Import and export watched word (#12444) Find & Replace and Autotag watched words were not completely exported and import did not work with these either. This commit changes the input and output format to CSV, which allows for a secondary column. This change is backwards compatible because a CSV file with only one column has one value per line. 2021-03-22 16:32:18 -04:00			`end`

FEATURE: Watched Words: when posts contain words, do one of flag, require approval, censor, or block 2017-06-28 16:56:44 -04:00			`def action_key=(arg)`
DEV: Refactor watched words (#24163) - Ignore only invalid words, not all words if one of them is invalid - The naming scheme for methods was inconsistent - Optimize regular expressions 2023-11-01 10:41:10 -04:00			`self.action = WatchedWord.actions[arg.to_sym]`
FEATURE: Watched Words: when posts contain words, do one of flag, require approval, censor, or block 2017-06-28 16:56:44 -04:00			`end`

FEATURE: add staff action logs for watched words (#13574) 2021-06-30 01:52:46 -04:00			`def action_log_details`
			`if replacement.present?`
			`"#{word} → #{replacement}"`
			`else`
			`word`
			`end`
			`end`

DEV: Refactor watched words (#24163) - Ignore only invalid words, not all words if one of them is invalid - The naming scheme for methods was inconsistent - Optimize regular expressions 2023-11-01 10:41:10 -04:00			`private`

			`def self.normalize_word(word)`
			`# When a regular expression is converted to a string, it is wrapped with`
			`# '(?-mix:' and ')'`
			`word = word[7..-2] if word.start_with?("(?-mix:")`

			`word.strip.squeeze("*")`
			`end`

			`def replacement_is_url`
			`errors.add(:base, :invalid_url) if replacement !~ URI.regexp`
			`end`

			`def replacement_is_tag_list`
			`tag_list = replacement&.split(",")`
			`tags = Tag.where(name: tag_list)`
			`if tag_list.blank? \|\| tags.empty? \|\| tag_list.size != tags.size`
			`errors.add(:base, :invalid_tag_list)`
			`end`
FEATURE: Watched Words: when posts contain words, do one of flag, require approval, censor, or block 2017-06-28 16:56:44 -04:00			`end`
			`end`
FEATURE: Add group settngs to allow users to leave a group freely. https://meta.discourse.org/t/split-join-leave-freely-setting-on-groups/65565 2017-07-27 22:37:10 -04:00
			`# == Schema Information`
			`#`
			`# Table name: watched_words`
			`#`
FEATURE: Add support for case-sensitive Watched Words (#17445) * FEATURE: Add case-sensitivity flag to watched_words Currently, all watched words are matched case-insensitively. This flag allows a watched word to be flagged for case-sensitive matching. To allow allow for backwards compatibility the flag is set to false by default. * FEATURE: Support case-sensitive creation of Watched Words via API Extend admin creation and upload of Watched Words to support case sensitive flag. This lays the ground work for supporting case-insensitive matching of Watched Words. Support for an extra column has also been introduced for the Watched Words upload CSV file. The new column structure is as follows: word,replacement,case_sentive * FEATURE: Enable case-sensitive matching of Watched Words WordWatcher's word_matcher_regexp now returns a list of regular expressions instead of one case-insensitive regular expression. With the ability to flag a Watched Word as case-sensitive, an action can have words of both sensitivities.This makes the use of the global Regexp::IGNORECASE flag added to all words problematic. To get around platform limitations around the use of subexpression level switches/flags, a list of regular expressions is returned instead, one for each case sensitivity. Word matching has also been updated to use this list of regular expressions instead of one. * FEATURE: Use case-sensitive regular expressions for Watched Words Update Watched Words regular expressions matching and processing to handle the extra metadata which comes along with the introduction of case-sensitive Watched Words. This allows case-sensitive Watched Words to matched as such. * DEV: Simplify type casting of case-sensitive flag from uploads Use builtin semantics instead of a custom method for converting string case flags in uploaded Watched Words to boolean. * UX: Add case-sensitivity details to Admin Watched Words UI Update Watched Word form to include a toggle for case-sensitivity. This also adds support for, case-sensitive testing and matching of Watched Word in the admin UI. * DEV: Code improvements from review feedback - Extract watched word regex creation out to a utility function - Make JS array presence check more explicit and readable * DEV: Extract Watched Word regex creation to utility function Clean-up work from review feedback. Reduce code duplication. * DEV: Rename word_matcher_regexp to word_matcher_regexp_list Since a list is returned now instead of a single regular expression, change `word_matcher_regexp` to `word_matcher_regexp_list` to better communicate this change. * DEV: Incorporate WordWatcher updates from upstream Resolve conflicts and ensure apply_to_text does not remove non-word characters in matches that aren't at the beginning of the line. 2022-08-02 04:06:03 -04:00			`# id :integer not null, primary key`
			`# word :string not null`
			`# action :integer not null`
			`# created_at :datetime not null`
			`# updated_at :datetime not null`
			`# replacement :string`
			`# case_sensitive :boolean default(FALSE), not null`
FEATURE: Add group settngs to allow users to leave a group freely. https://meta.discourse.org/t/split-join-leave-freely-setting-on-groups/65565 2017-07-27 22:37:10 -04:00			`#`
			`# Indexes`
			`#`
			`# index_watched_words_on_action_and_word (action,word) UNIQUE`
			`#`