discourse/app/services/search_indexer.rb

# frozen_string_literal: true
require_dependency 'search'

class SearchIndexer

  def self.disable
    @disabled = true
  end

  def self.enable
    @disabled = false
  end

  def self.scrub_html_for_search(html)
    HtmlScrubber.scrub(html)
  end

  def self.inject_extra_terms(raw)
    # insert some extra words for I.am.a.word so "word" is tokenized
    # I.am.a.word becomes I.am.a.word am a word
    raw.gsub(/[^[:space:]]*[\.]+[^[:space:]]*/) do |with_dot|
      split = with_dot.split(".")
      if split.length > 1
        with_dot + ((+" ") << split[1..-1].join(" "))
      else
        with_dot
      end
    end
  end

  def self.update_index(table: , id: , raw_data:)
    search_data = raw_data.map do |data|
      inject_extra_terms(Search.prepare_data(data || "", :index))
    end

    table_name = "#{table}_search_data"
    foreign_key = "#{table}_id"

    # for user login and name use "simple" lowercase stemmer
    stemmer = table == "user" ? "simple" : Search.ts_config

    ranked_index = <<~SQL
      setweight(to_tsvector('#{stemmer}', coalesce(:a,'')), 'A') ||
      setweight(to_tsvector('#{stemmer}', coalesce(:b,'')), 'B') ||
      setweight(to_tsvector('#{stemmer}', coalesce(:c,'')), 'C') ||
      setweight(to_tsvector('#{stemmer}', coalesce(:d,'')), 'D')
    SQL

    indexed_data = search_data.select { |d| d.length > 0 }.join(' ')

    params = {
      a: search_data[0],
      b: search_data[1],
      c: search_data[2],
      d: search_data[3],
      raw_data: indexed_data,
      id: id,
      locale: SiteSetting.default_locale,
      version: Search::INDEX_VERSION
    }

    # Would be nice to use AR here but not sure how to execut Postgres functions
    # when inserting data like this.
    rows = DB.exec(<<~SQL, params)
       UPDATE #{table_name}
       SET
          raw_data = :raw_data,
          locale = :locale,
          search_data = #{ranked_index},
          version = :version
       WHERE #{foreign_key} = :id
    SQL

    if rows == 0
      DB.exec(<<~SQL, params)
        INSERT INTO #{table_name}
        (#{foreign_key}, search_data, locale, raw_data, version)
        VALUES (:id, #{ranked_index}, :locale, :raw_data, :version)
      SQL
    end
  rescue
    # TODO is there any way we can safely avoid this?
    # best way is probably pushing search indexer into a dedicated process so it no longer happens on save
    # instead in the post processor
  end

  def self.update_topics_index(topic_id, title, cooked)
    scrubbed_cooked = scrub_html_for_search(cooked)[0...Topic::MAX_SIMILAR_BODY_LENGTH]

    # a bit inconsitent that we use title as A and body as B when in
    # the post index body is C
    update_index(table: 'topic', id: topic_id, raw_data: [title, scrubbed_cooked])
  end

  def self.update_posts_index(post_id, title, category, tags, cooked)
    update_index(table: 'post', id: post_id, raw_data: [title, category, tags, scrub_html_for_search(cooked)])
  end

  def self.update_users_index(user_id, username, name)
    update_index(table: 'user', id: user_id, raw_data: [username, name])
  end

  def self.update_categories_index(category_id, name)
    update_index(table: 'category', id: category_id, raw_data: [name])
  end

  def self.update_tags_index(tag_id, name)
    update_index(table: 'tag', id: tag_id, raw_data: [name])
  end

  def self.queue_post_reindex(topic_id)
    return if @disabled

    DB.exec(<<~SQL, topic_id: topic_id)
      UPDATE post_search_data
      SET version = 0
      WHERE post_id IN (SELECT id FROM posts WHERE topic_id = :topic_id)
    SQL
  end

  def self.index(obj, force: false)
    return if @disabled

    category_name = nil
    tag_names = nil
    topic = nil

    if Topic === obj
      topic = obj
    elsif Post === obj
      topic = obj.topic
    end

    category_name = topic.category&.name if topic
    tag_names = topic.tags.pluck(:name).join(' ') if topic

    if Post === obj && (obj.saved_change_to_cooked? || force)
      if topic
        SearchIndexer.update_posts_index(obj.id, topic.title, category_name, tag_names, obj.cooked)
        SearchIndexer.update_topics_index(topic.id, topic.title, obj.cooked) if obj.is_first_post?
      else
        Rails.logger.warn("Orphan post skipped in search_indexer, topic_id: #{obj.topic_id} post_id: #{obj.id} raw: #{obj.raw}")
      end
    end

    if User === obj && (obj.saved_change_to_username? || obj.saved_change_to_name? || force)
      SearchIndexer.update_users_index(obj.id, obj.username_lower || '', obj.name ? obj.name.downcase : '')
    end

    if Topic === obj && (obj.saved_change_to_title? || force)
      if obj.posts
        if post = obj.posts.find_by(post_number: 1)
          SearchIndexer.update_posts_index(post.id, obj.title, category_name, tag_names, post.cooked)
          SearchIndexer.update_topics_index(obj.id, obj.title, post.cooked)
        end
      end
    end

    if Category === obj && (obj.saved_change_to_name? || force)
      SearchIndexer.update_categories_index(obj.id, obj.name)
    end

    if Tag === obj && (obj.saved_change_to_name? || force)
      SearchIndexer.update_tags_index(obj.id, obj.name)
    end
  end

  class HtmlScrubber < Nokogiri::XML::SAX::Document
    attr_reader :scrubbed

    def initialize
      @scrubbed = +""
    end

    def self.scrub(html)
      return +"" if html.blank?

      me = new
      Nokogiri::HTML::SAX::Parser.new(me).parse("<div>#{html}</div>")
      me.scrubbed
    end

    ATTRIBUTES ||= %w{alt title href data-youtube-title}

    def start_element(_, attributes = [])
      attributes = Hash[*attributes.flatten]

      ATTRIBUTES.each do |name|
        characters(attributes[name]) if attributes[name].present?
      end
    end

    DIACRITICS ||= /([\u0300-\u036f]|[\u1AB0-\u1AFF]|[\u1DC0-\u1DFF]|[\u20D0-\u20FF])/

    def characters(string)
      scrubbed << " #{string.unicode_normalize(:nfd).gsub(DIACRITICS, "").strip} "
    end
  end
end
FEATURE: search within title using in:title Also - Significantly improved search ranking, title is treated most strongly - Adds tag names to the index - Run search re-indexer more aggressively - Re-index topic and all posts on category change 2018-02-19 22:41:00 -05:00			`# frozen_string_literal: true`
Fix locale dependend stemmer for FTS Fix locale dependend stemmer for FTS to improve search relevance on non English languages. 2013-07-22 19:07:59 -04:00			`require_dependency 'search'`

Remove SearchObserver, aim is to remove all observers rails-observers gem is mostly unmaintained and is a pain to carry forward new implementation contains significantly less magic as a bonus 2016-12-21 21:13:14 -05:00			`class SearchIndexer`

			`def self.disable`
			`@disabled = true`
			`end`

			`def self.enable`
			`@disabled = false`
			`end`
Initial release of Discourse 2013-02-05 14:16:51 -05:00
			`def self.scrub_html_for_search(html)`
			`HtmlScrubber.scrub(html)`
			`end`

FEATURE: search within title using in:title Also - Significantly improved search ranking, title is treated most strongly - Adds tag names to the index - Run search re-indexer more aggressively - Re-index topic and all posts on category change 2018-02-19 22:41:00 -05:00			`def self.inject_extra_terms(raw)`
shuffle code around so excerpt is not messed up 2016-07-25 03:12:01 -04:00			`# insert some extra words for I.am.a.word so "word" is tokenized`
FIX: not splitting words correctly for search tokenizer 2017-08-14 16:19:28 -04:00			`# I.am.a.word becomes I.am.a.word am a word`
FEATURE: search within title using in:title Also - Significantly improved search ranking, title is treated most strongly - Adds tag names to the index - Run search re-indexer more aggressively - Re-index topic and all posts on category change 2018-02-19 22:41:00 -05:00			`raw.gsub(/[^[:space:]][\.]+[^[:space:]]/) do \|with_dot\|`
shuffle code around so excerpt is not messed up 2016-07-25 03:12:01 -04:00			`split = with_dot.split(".")`
			`if split.length > 1`
FEATURE: search within title using in:title Also - Significantly improved search ranking, title is treated most strongly - Adds tag names to the index - Run search re-indexer more aggressively - Re-index topic and all posts on category change 2018-02-19 22:41:00 -05:00			`with_dot + ((+" ") << split[1..-1].join(" "))`
shuffle code around so excerpt is not messed up 2016-07-25 03:12:01 -04:00			`else`
			`with_dot`
			`end`
			`end`
FEATURE: search within title using in:title Also - Significantly improved search ranking, title is treated most strongly - Adds tag names to the index - Run search re-indexer more aggressively - Re-index topic and all posts on category change 2018-02-19 22:41:00 -05:00			`end`

			`def self.update_index(table: , id: , raw_data:)`
			`search_data = raw_data.map do \|data\|`
			`inject_extra_terms(Search.prepare_data(data \|\| "", :index))`
			`end`

			`table_name = "#{table}_search_data"`
			`foreign_key = "#{table}_id"`
shuffle code around so excerpt is not messed up 2016-07-25 03:12:01 -04:00
Fix locale dependend stemmer for FTS Fix locale dependend stemmer for FTS to improve search relevance on non English languages. 2013-07-22 19:07:59 -04:00			`# for user login and name use "simple" lowercase stemmer`
Replace rmmseg gem for cppjieba_rb since better dictionary (#5006) * Rename locale to ts config in search module to make it clear * Replace rmmese-cpp for cppjieba_rb 2017-07-31 15:28:48 -04:00			`stemmer = table == "user" ? "simple" : Search.ts_config`
Fix locale dependend stemmer for FTS Fix locale dependend stemmer for FTS to improve search relevance on non English languages. 2013-07-22 19:07:59 -04:00
FEATURE: search within title using in:title Also - Significantly improved search ranking, title is treated most strongly - Adds tag names to the index - Run search re-indexer more aggressively - Re-index topic and all posts on category change 2018-02-19 22:41:00 -05:00			`ranked_index = <<~SQL`
			`setweight(to_tsvector('#{stemmer}', coalesce(:a,'')), 'A') \|\|`
			`setweight(to_tsvector('#{stemmer}', coalesce(:b,'')), 'B') \|\|`
			`setweight(to_tsvector('#{stemmer}', coalesce(:c,'')), 'C') \|\|`
			`setweight(to_tsvector('#{stemmer}', coalesce(:d,'')), 'D')`
			`SQL`

			`indexed_data = search_data.select { \|d\| d.length > 0 }.join(' ')`

			`params = {`
			`a: search_data[0],`
			`b: search_data[1],`
			`c: search_data[2],`
			`d: search_data[3],`
			`raw_data: indexed_data,`
			`id: id,`
			`locale: SiteSetting.default_locale,`
			`version: Search::INDEX_VERSION`
			`}`

Search Refactor: Remove some manual SQL, make search data tables more idomatic Rails/AR 2013-05-22 15:33:33 -04:00			`# Would be nice to use AR here but not sure how to execut Postgres functions`
			`# when inserting data like this.`
DEV: remove exec_sql and replace with mini_sql Introduce new patterns for direct sql that are safe and fast. MiniSql is not prone to memory bloat that can happen with direct PG usage. It also has an extremely fast materializer and very a convenient API - DB.exec(sql, params) => runs sql returns row count - DB.query(sql, params) => runs sql returns usable objects (not a hash) - DB.query_hash(sql, params) => runs sql returns an array of hashes - DB.query_single(sql, params) => runs sql and returns a flat one dimensional array - DB.build(sql) => returns a sql builder See more at: https://github.com/discourse/mini_sql 2018-06-19 02:13:14 -04:00			`rows = DB.exec(<<~SQL, params)`
FEATURE: search within title using in:title Also - Significantly improved search ranking, title is treated most strongly - Adds tag names to the index - Run search re-indexer more aggressively - Re-index topic and all posts on category change 2018-02-19 22:41:00 -05:00			`UPDATE #{table_name}`
			`SET`
			`raw_data = :raw_data,`
			`locale = :locale,`
			`search_data = #{ranked_index},`
			`version = :version`
			`WHERE #{foreign_key} = :id`
			`SQL`

Search Refactor: Remove some manual SQL, make search data tables more idomatic Rails/AR 2013-05-22 15:33:33 -04:00			`if rows == 0`
DEV: remove exec_sql and replace with mini_sql Introduce new patterns for direct sql that are safe and fast. MiniSql is not prone to memory bloat that can happen with direct PG usage. It also has an extremely fast materializer and very a convenient API - DB.exec(sql, params) => runs sql returns row count - DB.query(sql, params) => runs sql returns usable objects (not a hash) - DB.query_hash(sql, params) => runs sql returns an array of hashes - DB.query_single(sql, params) => runs sql and returns a flat one dimensional array - DB.build(sql) => returns a sql builder See more at: https://github.com/discourse/mini_sql 2018-06-19 02:13:14 -04:00			`DB.exec(<<~SQL, params)`
FEATURE: search within title using in:title Also - Significantly improved search ranking, title is treated most strongly - Adds tag names to the index - Run search re-indexer more aggressively - Re-index topic and all posts on category change 2018-02-19 22:41:00 -05:00			`INSERT INTO #{table_name}`
			`(#{foreign_key}, search_data, locale, raw_data, version)`
			`VALUES (:id, #{ranked_index}, :locale, :raw_data, :version)`
			`SQL`
Initial release of Discourse 2013-02-05 14:16:51 -05:00			`end`
Search Refactor: Remove some manual SQL, make search data tables more idomatic Rails/AR 2013-05-22 15:33:33 -04:00			`rescue`
FEATURE: search within title using in:title Also - Significantly improved search ranking, title is treated most strongly - Adds tag names to the index - Run search re-indexer more aggressively - Re-index topic and all posts on category change 2018-02-19 22:41:00 -05:00			`# TODO is there any way we can safely avoid this?`
			`# best way is probably pushing search indexer into a dedicated process so it no longer happens on save`
			`# instead in the post processor`
Initial release of Discourse 2013-02-05 14:16:51 -05:00			`end`

PERF: new table used for title similarity search 2014-08-08 01:50:26 -04:00			`def self.update_topics_index(topic_id, title, cooked)`
FEATURE: search within title using in:title Also - Significantly improved search ranking, title is treated most strongly - Adds tag names to the index - Run search re-indexer more aggressively - Re-index topic and all posts on category change 2018-02-19 22:41:00 -05:00			`scrubbed_cooked = scrub_html_for_search(cooked)[0...Topic::MAX_SIMILAR_BODY_LENGTH]`

			`# a bit inconsitent that we use title as A and body as B when in`
			`# the post index body is C`
			`update_index(table: 'topic', id: topic_id, raw_data: [title, scrubbed_cooked])`
PERF: new table used for title similarity search 2014-08-08 01:50:26 -04:00			`end`

FEATURE: search within title using in:title Also - Significantly improved search ranking, title is treated most strongly - Adds tag names to the index - Run search re-indexer more aggressively - Re-index topic and all posts on category change 2018-02-19 22:41:00 -05:00			`def self.update_posts_index(post_id, title, category, tags, cooked)`
			`update_index(table: 'post', id: post_id, raw_data: [title, category, tags, scrub_html_for_search(cooked)])`
Initial release of Discourse 2013-02-05 14:16:51 -05:00			`end`

			`def self.update_users_index(user_id, username, name)`
FEATURE: search within title using in:title Also - Significantly improved search ranking, title is treated most strongly - Adds tag names to the index - Run search re-indexer more aggressively - Re-index topic and all posts on category change 2018-02-19 22:41:00 -05:00			`update_index(table: 'user', id: user_id, raw_data: [username, name])`
Initial release of Discourse 2013-02-05 14:16:51 -05:00			`end`
Fix all the trailing whitespace 2013-02-07 10:45:24 -05:00
Initial release of Discourse 2013-02-05 14:16:51 -05:00			`def self.update_categories_index(category_id, name)`
FEATURE: search within title using in:title Also - Significantly improved search ranking, title is treated most strongly - Adds tag names to the index - Run search re-indexer more aggressively - Re-index topic and all posts on category change 2018-02-19 22:41:00 -05:00			`update_index(table: 'category', id: category_id, raw_data: [name])`
Initial release of Discourse 2013-02-05 14:16:51 -05:00			`end`

FEATURE: show tags in search results 2017-08-25 11:52:18 -04:00			`def self.update_tags_index(tag_id, name)`
FEATURE: search within title using in:title Also - Significantly improved search ranking, title is treated most strongly - Adds tag names to the index - Run search re-indexer more aggressively - Re-index topic and all posts on category change 2018-02-19 22:41:00 -05:00			`update_index(table: 'tag', id: tag_id, raw_data: [name])`
			`end`

			`def self.queue_post_reindex(topic_id)`
			`return if @disabled`

DEV: remove exec_sql and replace with mini_sql Introduce new patterns for direct sql that are safe and fast. MiniSql is not prone to memory bloat that can happen with direct PG usage. It also has an extremely fast materializer and very a convenient API - DB.exec(sql, params) => runs sql returns row count - DB.query(sql, params) => runs sql returns usable objects (not a hash) - DB.query_hash(sql, params) => runs sql returns an array of hashes - DB.query_single(sql, params) => runs sql and returns a flat one dimensional array - DB.build(sql) => returns a sql builder See more at: https://github.com/discourse/mini_sql 2018-06-19 02:13:14 -04:00			`DB.exec(<<~SQL, topic_id: topic_id)`
FEATURE: search within title using in:title Also - Significantly improved search ranking, title is treated most strongly - Adds tag names to the index - Run search re-indexer more aggressively - Re-index topic and all posts on category change 2018-02-19 22:41:00 -05:00			`UPDATE post_search_data`
			`SET version = 0`
			`WHERE post_id IN (SELECT id FROM posts WHERE topic_id = :topic_id)`
			`SQL`
FEATURE: show tags in search results 2017-08-25 11:52:18 -04:00			`end`

FIX: rebuild index when engine replaced (#5021) 2017-08-16 07:38:34 -04:00			`def self.index(obj, force: false)`
Remove SearchObserver, aim is to remove all observers rails-observers gem is mostly unmaintained and is a pain to carry forward new implementation contains significantly less magic as a bonus 2016-12-21 21:13:14 -05:00			`return if @disabled`

FIX: remove diacritics when tokenizing html for search 2018-08-23 11:13:52 -04:00			`category_name = nil`
			`tag_names = nil`
FEATURE: search within title using in:title Also - Significantly improved search ranking, title is treated most strongly - Adds tag names to the index - Run search re-indexer more aggressively - Re-index topic and all posts on category change 2018-02-19 22:41:00 -05:00			`topic = nil`

			`if Topic === obj`
			`topic = obj`
			`elsif Post === obj`
			`topic = obj.topic`
			`end`

			`category_name = topic.category&.name if topic`
			`tag_names = topic.tags.pluck(:name).join(' ') if topic`

			`if Post === obj && (obj.saved_change_to_cooked? \|\| force)`
			`if topic`
			`SearchIndexer.update_posts_index(obj.id, topic.title, category_name, tag_names, obj.cooked)`
			`SearchIndexer.update_topics_index(topic.id, topic.title, obj.cooked) if obj.is_first_post?`
More logging, less problems 2014-05-06 22:35:26 -04:00			`else`
Remove SearchObserver, aim is to remove all observers rails-observers gem is mostly unmaintained and is a pain to carry forward new implementation contains significantly less magic as a bonus 2016-12-21 21:13:14 -05:00			`Rails.logger.warn("Orphan post skipped in search_indexer, topic_id: #{obj.topic_id} post_id: #{obj.id} raw: #{obj.raw}")`
More logging, less problems 2014-05-06 22:35:26 -04:00			`end`
Initial release of Discourse 2013-02-05 14:16:51 -05:00			`end`
FIX: rebuild index when engine replaced (#5021) 2017-08-16 07:38:34 -04:00
FEATURE: search within title using in:title Also - Significantly improved search ranking, title is treated most strongly - Adds tag names to the index - Run search re-indexer more aggressively - Re-index topic and all posts on category change 2018-02-19 22:41:00 -05:00			`if User === obj && (obj.saved_change_to_username? \|\| obj.saved_change_to_name? \|\| force)`
Remove SearchObserver, aim is to remove all observers rails-observers gem is mostly unmaintained and is a pain to carry forward new implementation contains significantly less magic as a bonus 2016-12-21 21:13:14 -05:00			`SearchIndexer.update_users_index(obj.id, obj.username_lower \|\| '', obj.name ? obj.name.downcase : '')`
Initial release of Discourse 2013-02-05 14:16:51 -05:00			`end`

FEATURE: search within title using in:title Also - Significantly improved search ranking, title is treated most strongly - Adds tag names to the index - Run search re-indexer more aggressively - Re-index topic and all posts on category change 2018-02-19 22:41:00 -05:00			`if Topic === obj && (obj.saved_change_to_title? \|\| force)`
Initial release of Discourse 2013-02-05 14:16:51 -05:00			`if obj.posts`
FIX: remove diacritics when tokenizing html for search 2018-08-23 11:13:52 -04:00			`if post = obj.posts.find_by(post_number: 1)`
FEATURE: search within title using in:title Also - Significantly improved search ranking, title is treated most strongly - Adds tag names to the index - Run search re-indexer more aggressively - Re-index topic and all posts on category change 2018-02-19 22:41:00 -05:00			`SearchIndexer.update_posts_index(post.id, obj.title, category_name, tag_names, post.cooked)`
Remove SearchObserver, aim is to remove all observers rails-observers gem is mostly unmaintained and is a pain to carry forward new implementation contains significantly less magic as a bonus 2016-12-21 21:13:14 -05:00			`SearchIndexer.update_topics_index(obj.id, obj.title, post.cooked)`
Initial release of Discourse 2013-02-05 14:16:51 -05:00			`end`
			`end`
			`end`

FEATURE: search within title using in:title Also - Significantly improved search ranking, title is treated most strongly - Adds tag names to the index - Run search re-indexer more aggressively - Re-index topic and all posts on category change 2018-02-19 22:41:00 -05:00			`if Category === obj && (obj.saved_change_to_name? \|\| force)`
Remove SearchObserver, aim is to remove all observers rails-observers gem is mostly unmaintained and is a pain to carry forward new implementation contains significantly less magic as a bonus 2016-12-21 21:13:14 -05:00			`SearchIndexer.update_categories_index(obj.id, obj.name)`
Initial release of Discourse 2013-02-05 14:16:51 -05:00			`end`
FEATURE: show tags in search results 2017-08-25 11:52:18 -04:00
FEATURE: search within title using in:title Also - Significantly improved search ranking, title is treated most strongly - Adds tag names to the index - Run search re-indexer more aggressively - Re-index topic and all posts on category change 2018-02-19 22:41:00 -05:00			`if Tag === obj && (obj.saved_change_to_name? \|\| force)`
FEATURE: show tags in search results 2017-08-25 11:52:18 -04:00			`SearchIndexer.update_tags_index(obj.id, obj.name)`
			`end`
Initial release of Discourse 2013-02-05 14:16:51 -05:00			`end`

			`class HtmlScrubber < Nokogiri::XML::SAX::Document`
			`attr_reader :scrubbed`

			`def initialize`
FEATURE: search within title using in:title Also - Significantly improved search ranking, title is treated most strongly - Adds tag names to the index - Run search re-indexer more aggressively - Re-index topic and all posts on category change 2018-02-19 22:41:00 -05:00			`@scrubbed = +""`
Initial release of Discourse 2013-02-05 14:16:51 -05:00			`end`

			`def self.scrub(html)`
Return immediately when there's no html to scrub 2018-08-23 12:00:07 -04:00			`return +"" if html.blank?`

minor cleanup, using AR querying DSL over raw SQL in some places 2013-02-28 13:54:12 -05:00			`me = new`
Return immediately when there's no html to scrub 2018-08-23 12:00:07 -04:00			`Nokogiri::HTML::SAX::Parser.new(me).parse("<div>#{html}</div>")`
Initial release of Discourse 2013-02-05 14:16:51 -05:00			`me.scrubbed`
			`end`

FIX: remove diacritics when tokenizing html for search 2018-08-23 11:13:52 -04:00			`ATTRIBUTES \|\|= %w{alt title href data-youtube-title}`

			`def start_element(_, attributes = [])`
Initial release of Discourse 2013-02-05 14:16:51 -05:00			`attributes = Hash[*attributes.flatten]`
FIX: remove diacritics when tokenizing html for search 2018-08-23 11:13:52 -04:00
			`ATTRIBUTES.each do \|name\|`
			`characters(attributes[name]) if attributes[name].present?`
FEATURE: Make links indexable. (#6285) 2018-08-19 20:39:19 -04:00			`end`
Initial release of Discourse 2013-02-05 14:16:51 -05:00			`end`

FIX: remove diacritics instead of transliterating 2018-08-23 18:38:44 -04:00			`DIACRITICS \|\|= /([\u0300-\u036f]\|[\u1AB0-\u1AFF]\|[\u1DC0-\u1DFF]\|[\u20D0-\u20FF])/`

Initial release of Discourse 2013-02-05 14:16:51 -05:00			`def characters(string)`
FIX: remove diacritics instead of transliterating 2018-08-23 18:38:44 -04:00			`scrubbed << " #{string.unicode_normalize(:nfd).gsub(DIACRITICS, "").strip} "`
Initial release of Discourse 2013-02-05 14:16:51 -05:00			`end`
			`end`
			`end`