discourse/script/import_scripts/discuz_x.rb

# encoding: utf-8
#
# Author: Erick Guan <fantasticfears@gmail.com>
#
# This script import the data from latest Discuz! X
# Should work among Discuz! X3.x
# This script is tested only on Simplified Chinese Discuz! X instances
# If you want to import data other than Simplified Chinese, email me.

require 'php_serialize'
require 'miro'
require 'mysql2'
require File.expand_path(File.dirname(__FILE__) + "/base.rb")

class ImportScripts::DiscuzX < ImportScripts::Base

  DISCUZX_DB = "ultrax"
  DB_TABLE_PREFIX = 'pre_'
  BATCH_SIZE = 1000
  ORIGINAL_SITE_PREFIX = "oldsite.example.com/forums" # without http(s)://
  NEW_SITE_PREFIX      = "http://discourse.example.com"  # with http:// or https://

  # Set DISCUZX_BASE_DIR to the base directory of your discuz installation.
  DISCUZX_BASE_DIR      = '/var/www/discuz/upload'
  AVATAR_DIR            = '/uc_server/data/avatar'
  ATTACHMENT_DIR        = '/data/attachment/forum'
  AUTHORIZED_EXTENSIONS = ['jpg', 'jpeg', 'png', 'gif', 'zip', 'rar', 'pdf']

  def initialize
    super

    @client = Mysql2::Client.new(
      host: "localhost",
      username: "root",
      #password: "password",
      database: DISCUZX_DB
    )
    @first_post_id_by_topic_id = {}

    @internal_url_regexps = [
      /http(?:s)?:\/\/#{ORIGINAL_SITE_PREFIX.gsub('.', '\.')}\/forum\.php\?mod=viewthread(?:&|&amp;)tid=(?<tid>\d+)(?:[^\[\]\s]*)(?:pid=?(?<pid>\d+))?(?:[^\[\]\s]*)/,
      /http(?:s)?:\/\/#{ORIGINAL_SITE_PREFIX.gsub('.', '\.')}\/viewthread\.php\?tid=(?<tid>\d+)(?:[^\[\]\s]*)(?:pid=?(?<pid>\d+))?(?:[^\[\]\s]*)/,
      /http(?:s)?:\/\/#{ORIGINAL_SITE_PREFIX.gsub('.', '\.')}\/forum\.php\?mod=redirect(?:&|&amp;)goto=findpost(?:&|&amp;)pid=(?<pid>\d+)(?:&|&amp;)ptid=(?<tid>\d+)(?:[^\[\]\s]*)/,
      /http(?:s)?:\/\/#{ORIGINAL_SITE_PREFIX.gsub('.', '\.')}\/redirect\.php\?goto=findpost(?:&|&amp;)pid=(?<pid>\d+)(?:&|&amp;)ptid=(?<tid>\d+)(?:[^\[\]\s]*)/,
      /http(?:s)?:\/\/#{ORIGINAL_SITE_PREFIX.gsub('.', '\.')}\/forumdisplay\.php\?fid=(?<fid>\d+)(?:[^\[\]\s]*)/,
      /http(?:s)?:\/\/#{ORIGINAL_SITE_PREFIX.gsub('.', '\.')}\/forum\.php\?mod=forumdisplay(?:&|&amp;)fid=(?<fid>\d+)(?:[^\[\]\s]*)/,
      /http(?:s)?:\/\/#{ORIGINAL_SITE_PREFIX.gsub('.', '\.')}\/(?<action>index)\.php(?:[^\[\]\s]*)/,
      /http(?:s)?:\/\/#{ORIGINAL_SITE_PREFIX.gsub('.', '\.')}\/(?<action>stats)\.php(?:[^\[\]\s]*)/,
      /http(?:s)?:\/\/#{ORIGINAL_SITE_PREFIX.gsub('.', '\.')}\/misc.php\?mod=(?<mod>stat|ranklist)(?:[^\[\]\s]*)/
    ]

  end

  def execute
    get_knowledge_about_duplicated_email
    import_users
    import_categories
    import_posts
    import_private_messages
    import_attachments
  end

  # add the prefix to the table name
  def table_name(name = nil)
    DB_TABLE_PREFIX + name
  end

  # find which group members can be granted as admin
  def get_knowledge_about_group
    group_table = table_name 'common_usergroup'
    result = mysql_query(
      "SELECT groupid group_id, radminid role_id
             FROM #{group_table};")
    @moderator_group_id = []
    @admin_group_id = []
    #@banned_group_id = [4,5] # 禁止的用户及其帖子均不导入，如果你想导入这些用户和帖子，请把这个数组清空。

    result.each do |group|
      case group['role_id']
      when 1 # 管理员
        @admin_group_id << group['group_id']
      when 2, 3 # 超级版主、版主。如果你不希望原普通版主成为Discourse版主，把3去掉。
        @moderator_group_id << group['group_id']
      end
    end
  end

  def get_knowledge_about_category_slug
    @category_slug = {}
    results = mysql_query("SELECT svalue value
      FROM #{table_name 'common_setting'}
      WHERE skey = 'forumkeys'")

    return if results.size < 1
    value = results.first['value']

    return if value.blank?

    PHP.unserialize(value).each do |category_import_id, slug|
      next if slug.blank?
      @category_slug[category_import_id] = slug
    end
  end

  def get_knowledge_about_duplicated_email
    @duplicated_email = {}
    results = mysql_query(
      "select a.uid uid, b.uid import_id from pre_common_member a
        join (select uid, email from pre_common_member group by email having count(email) > 1 order by uid asc) b USING(email)
        where a.uid != b.uid")

    users = @lookup.instance_variable_get :@users

    results.each do |row|
      @duplicated_email[row['uid']] = row['import_id']
      user_id = users[row['import_id']]
      if user_id
        users[row['uid']] = user_id
      end
    end
  end

  def import_users
    puts '', "creating users"

    get_knowledge_about_group

    sensitive_user_table = table_name 'ucenter_members'
    user_table = table_name 'common_member'
    profile_table = table_name 'common_member_profile'
    status_table = table_name 'common_member_status'
    forum_table = table_name 'common_member_field_forum'
    home_table = table_name 'common_member_field_home'
    total_count = mysql_query("SELECT count(*) count FROM #{user_table};").first['count']

    batches(BATCH_SIZE) do |offset|
      results = mysql_query(
        "SELECT u.uid id, u.username username, u.email email, u.groupid group_id,
                    su.regdate regdate, su.password password_hash, su.salt salt,
                    s.regip regip, s.lastip last_visit_ip, s.lastvisit last_visit_time, s.lastpost last_posted_at, s.lastsendmail last_emailed_at,
                    u.emailstatus email_confirmed, u.avatarstatus avatar_exists,
                    p.site website, p.address address, p.bio bio, p.realname realname, p.qq qq,
                    p.resideprovince resideprovince, p.residecity residecity, p.residedist residedist, p.residecommunity residecommunity,
                    p.resideprovince birthprovince, p.birthcity birthcity, p.birthdist birthdist, p.birthcommunity birthcommunity,
                    h.spacecss spacecss, h.spacenote spacenote,
                    f.customstatus customstatus, f.sightml sightml
               FROM #{user_table} u
               LEFT JOIN #{sensitive_user_table} su USING(uid)
               LEFT JOIN #{profile_table} p USING(uid)
               LEFT JOIN #{status_table} s USING(uid)
               LEFT JOIN #{forum_table} f USING(uid)
               LEFT JOIN #{home_table} h USING(uid)
              ORDER BY u.uid ASC
              LIMIT #{BATCH_SIZE}
             OFFSET #{offset};")

      break if results.size < 1

      # TODO: breaks the scipt reported by some users
      # next if all_records_exist? :users, users.map {|u| u["id"].to_i}

      create_users(results, total: total_count, offset: offset) do |user|
        { id: user['id'],
          email: user['email'],
          username: user['username'],
          name: first_exists(user['realname'], user['customstatus'], user['username']),
          import_pass: user['password_hash'],
          active: true,
          salt: user['salt'],
          # TODO: title: user['customstatus'], # move custom title to name since discourse can't let user custom title https://meta.discourse.org/t/let-users-custom-their-title/37626
          created_at: user['regdate'] ? Time.zone.at(user['regdate']) : nil,
          registration_ip_address: user['regip'],
          ip_address: user['last_visit_ip'],
          last_seen_at: user['last_visit_time'],
          last_emailed_at: user['last_emailed_at'],
          last_posted_at: user['last_posted_at'],
          moderator: @moderator_group_id.include?(user['group_id']),
          admin: @admin_group_id.include?(user['group_id']),
          website: (user['website'] && user['website'].include?('.')) ? user['website'].strip : (user['qq'] && user['qq'].strip == (user['qq'].strip.to_i) && user['qq'].strip.to_i > (10000)) ? 'http://user.qzone.qq.com/' + user['qq'].strip : nil,
          bio_raw: first_exists((user['bio'] && CGI.unescapeHTML(user['bio'])), user['sightml'], user['spacenote']).strip[0, 3000],
          location: first_exists(user['address'], (!user['resideprovince'].blank? ? [user['resideprovince'],  user['residecity'], user['residedist'], user['residecommunity']] : [user['birthprovince'],  user['birthcity'], user['birthdist'], user['birthcommunity']]).reject { |location|location.blank? }.join(' ')),
          post_create_action: lambda do |newmember|
            if user['avatar_exists'] == (1) && newmember.uploaded_avatar_id.blank?
              path, filename = discuzx_avatar_fullpath(user['id'])
              if path
                begin
                  upload = create_upload(newmember.id, path, filename)
                  if !upload.nil? && upload.persisted?
                    newmember.import_mode = false
                    newmember.create_user_avatar
                    newmember.import_mode = true
                    newmember.user_avatar.update(custom_upload_id: upload.id)
                    newmember.update(uploaded_avatar_id: upload.id)
                  else
                    puts "Error: Upload did not persist!"
                  end
                rescue SystemCallError => err
                  puts "Could not import avatar: #{err.message}"
                end
              end
            end
            if !user['spacecss'].blank? && newmember.user_profile.profile_background.blank?
              # profile background
              if matched = user['spacecss'].match(/body\s*{[^}]*url\('?(.+?)'?\)/i)
                body_background = matched[1].split(ORIGINAL_SITE_PREFIX, 2).last
              end
              if matched = user['spacecss'].match(/#hd\s*{[^}]*url\('?(.+?)'?\)/i)
                header_background = matched[1].split(ORIGINAL_SITE_PREFIX, 2).last
              end
              if matched = user['spacecss'].match(/.blocktitle\s*{[^}]*url\('?(.+?)'?\)/i)
                blocktitle_background = matched[1].split(ORIGINAL_SITE_PREFIX, 2).last
              end
              if matched = user['spacecss'].match(/#ct\s*{[^}]*url\('?(.+?)'?\)/i)
                content_background = matched[1].split(ORIGINAL_SITE_PREFIX, 2).last
              end

              if body_background || header_background || blocktitle_background || content_background
                profile_background = first_exists(header_background, body_background, content_background, blocktitle_background)
                card_background = first_exists(content_background, body_background, header_background, blocktitle_background)
                upload = create_upload(newmember.id, File.join(DISCUZX_BASE_DIR, profile_background), File.basename(profile_background))
                if upload
                  newmember.user_profile.upload_profile_background upload
                else
                  puts "WARNING: #{user['username']} (UID: #{user['id']}) profile_background file did not persist!"
                end
                upload = create_upload(newmember.id, File.join(DISCUZX_BASE_DIR, card_background), File.basename(card_background))
                if upload
                  newmember.user_profile.upload_card_background upload
                else
                  puts "WARNING: #{user['username']} (UID: #{user['id']}) card_background file did not persist!"
                end
              end
            end

            # we don't send email to the unconfirmed user
            newmember.update(email_digests: user['email_confirmed'] == 1) if newmember.email_digests
            newmember.update(name: '') if !newmember.name.blank? && newmember.name == (newmember.username)
          end
        }
      end
    end
  end

  def import_categories
    puts '', "creating categories"

    get_knowledge_about_category_slug

    forums_table = table_name 'forum_forum'
    forums_data_table = table_name 'forum_forumfield'

    results = mysql_query("
          SELECT f.fid id, f.fup parent_id, f.name, f.type type, f.status status, f.displayorder position,
                 d.description description, d.rules rules, d.icon, d.extra extra
            FROM #{forums_table} f
            LEFT JOIN #{forums_data_table} d USING(fid)
           ORDER BY parent_id ASC, id ASC
        ")

    max_position = Category.all.max_by(&:position).position
    create_categories(results) do |row|
      next if row['type'] == ('group') || row['status'] == (2) # or row['status'].to_i == 3 # 如果不想导入群组，取消注释
      extra = PHP.unserialize(row['extra']) if !row['extra'].blank?
      if extra && !extra["namecolor"].blank?
        color = extra["namecolor"][1, 6]
      end

      Category.all.max_by(&:position).position

      h = {
        id: row['id'],
        name: row['name'],
        description: row['description'],
        position: row['position'].to_i + max_position,
        color: color,
        suppress_from_latest: (row['status'] == (0) || row['status'] == (3)),
        post_create_action: lambda do |category|
          if slug = @category_slug[row['id']]
            category.update(slug: slug)
          end

          raw = process_discuzx_post(row['rules'], nil)
          if @bbcode_to_md
            raw = raw.bbcode_to_md(false) rescue raw
          end
          category.topic.posts.first.update_attribute(:raw, raw)
          if !row['icon'].empty?
            upload = create_upload(Discourse::SYSTEM_USER_ID, File.join(DISCUZX_BASE_DIR, ATTACHMENT_DIR, '../common', row['icon']), File.basename(row['icon']))
            if upload
              category.uploaded_logo_id = upload.id
              # FIXME: I don't know how to get '/shared' by script. May change to Rails.root
              category.color = Miro::DominantColors.new(File.join('/shared', upload.url)).to_hex.first[1, 6] if !color
              category.save!
            end
          end
          category
        end
      }
      if row['parent_id'].to_i > 0
        h[:parent_category_id] = category_id_from_imported_category_id(row['parent_id'])
      end
      h
    end
  end

  def import_posts
    puts "", "creating topics and posts"

    users_table = table_name 'common_member'
    posts_table = table_name 'forum_post'
    topics_table = table_name 'forum_thread'

    total_count = mysql_query("SELECT count(*) count FROM #{posts_table}").first['count']

    batches(BATCH_SIZE) do |offset|
      results = mysql_query("
            SELECT p.pid id,
                   p.tid topic_id,
                   t.fid category_id,
                   t.subject title,
                   p.authorid user_id,
                   p.message raw,
                   p.dateline post_time,
                   p2.pid first_id,
                   p.invisible status,
                   t.special special
              FROM #{posts_table} p
              JOIN #{posts_table} p2 ON p2.first AND p2.tid = p.tid
              JOIN #{topics_table} t ON t.tid = p.tid
              where t.tid < 10000
             ORDER BY id ASC, topic_id ASC
             LIMIT #{BATCH_SIZE}
            OFFSET #{offset};
          ")
      # u.status != -1 AND u.groupid != 4 AND u.groupid != 5 用户未被锁定、禁访或禁言。在现实中的 Discuz 论坛，禁止的用户通常是广告机或驱逐的用户，这些不需要导入。
      break if results.size < 1

      next if all_records_exist? :posts, results.map { |p| p["id"].to_i }

      create_posts(results, total: total_count, offset: offset) do |m|
        skip = false
        mapped = {}

        mapped[:id] = m['id']
        mapped[:user_id] = user_id_from_imported_user_id(m['user_id']) || -1
        mapped[:raw] = process_discuzx_post(m['raw'], m['id'])
        mapped[:created_at] = Time.zone.at(m['post_time'])
        mapped[:tags] = m['tags']

        if m['id'] == m['first_id']
          mapped[:category] = category_id_from_imported_category_id(m['category_id'])
          mapped[:title] = CGI.unescapeHTML(m['title'])

          if m['special'] == 1
            results = mysql_query("
              SELECT multiple, maxchoices
              FROM #{table_name 'forum_poll'}
              WHERE tid = #{m['topic_id']}")
            poll = results.first || {}
            results = mysql_query("
              SELECT polloption
              FROM #{table_name 'forum_polloption'}
              WHERE tid = #{m['topic_id']}
              ORDER BY displayorder")
            if results.empty?
              puts "WARNING: can't find poll options for topic #{m['topic_id']}, skip poll"
            else
              mapped[:raw].prepend "[poll#{poll['multiple'] ? ' type=multiple' : ''}#{poll['maxchoices'] > 0 ? " max=#{poll['maxchoices']}" : ''}]\n#{results.map { |option|'- ' + option['polloption'] }.join("\n")}\n[/poll]\n"
            end
          end
        else
          parent = topic_lookup_from_imported_post_id(m['first_id'])

          if parent
            mapped[:topic_id] = parent[:topic_id]
            reply_post_import_id = find_post_id_by_quote_number(m['raw'])
            if reply_post_import_id
              post_id = post_id_from_imported_post_id(reply_post_import_id.to_i)
              if (post = Post.find_by(id: post_id))
                if post.topic_id == mapped[:topic_id]
                  mapped[:reply_to_post_number] = post.post_number
                else
                  puts "post #{m['id']} reply to another topic, skip reply"
                end
              else
                puts "post #{m['id']} reply to not exists post #{reply_post_import_id}, skip reply"
              end
            end
          else
            puts "Parent topic #{m['topic_id']} doesn't exist. Skipping #{m['id']}: #{m['title'][0..40]}"
            skip = true
          end

        end

        if m['status'] & 1 == 1 || mapped[:raw].blank?
          mapped[:post_create_action] = lambda do |action_post|
            PostDestroyer.new(Discourse.system_user, action_post).perform_delete
          end
        elsif (m['status'] & 2) >> 1 == 1 # waiting for approve
          mapped[:post_create_action] = lambda do |action_post|
            PostActionCreator.notify_user(Discourse.system_user, action_post)
          end
        end
        skip ? nil : mapped
      end
    end
  end

  def import_bookmarks
    puts '', 'creating bookmarks'
    favorites_table = table_name 'home_favorite'
    posts_table = table_name 'forum_post'

    total_count = mysql_query("SELECT count(*) count FROM #{favorites_table} WHERE idtype = 'tid'").first['count']
    batches(BATCH_SIZE) do |offset|
      results = mysql_query("
        SELECT p.pid post_id, f.uid user_id
          FROM #{favorites_table} f
          JOIN #{posts_table} p ON f.id = p.tid
          WHERE f.idtype = 'tid' AND p.first
             LIMIT #{BATCH_SIZE}
            OFFSET #{offset};")

      break if results.size < 1

      # next if all_records_exist?

      create_bookmarks(results, total: total_count, offset: offset) do |row|
        {
          user_id: row['user_id'],
          post_id: row['post_id']
        }
      end
    end
  end

  def import_private_messages
    puts '', 'creating private messages'

    pm_indexes = table_name 'ucenter_pm_indexes'
    pm_messages = table_name 'ucenter_pm_messages'
    total_count = mysql_query("SELECT count(*) count FROM #{pm_indexes}").first['count']

    batches(BATCH_SIZE) do |offset|
      results = mysql_query("
            SELECT pmid id, plid thread_id, authorid user_id, message, dateline created_at
              FROM #{pm_messages}_1
      UNION SELECT pmid id, plid thread_id, authorid user_id, message, dateline created_at
              FROM #{pm_messages}_2
      UNION SELECT pmid id, plid thread_id, authorid user_id, message, dateline created_at
              FROM #{pm_messages}_3
      UNION SELECT pmid id, plid thread_id, authorid user_id, message, dateline created_at
              FROM #{pm_messages}_4
      UNION SELECT pmid id, plid thread_id, authorid user_id, message, dateline created_at
              FROM #{pm_messages}_5
      UNION SELECT pmid id, plid thread_id, authorid user_id, message, dateline created_at
              FROM #{pm_messages}_6
      UNION SELECT pmid id, plid thread_id, authorid user_id, message, dateline created_at
              FROM #{pm_messages}_7
      UNION SELECT pmid id, plid thread_id, authorid user_id, message, dateline created_at
              FROM #{pm_messages}_8
      UNION SELECT pmid id, plid thread_id, authorid user_id, message, dateline created_at
              FROM #{pm_messages}_9
          ORDER BY thread_id ASC, id ASC
             LIMIT #{BATCH_SIZE}
            OFFSET #{offset};")

      break if results.size < 1

      # next if all_records_exist? :posts, results.map {|m| "pm:#{m['id']}"}

      create_posts(results, total: total_count, offset: offset) do |m|
        skip = false
        mapped = {}

        mapped[:id] = "pm:#{m['id']}"
        mapped[:user_id] = user_id_from_imported_user_id(m['user_id']) || -1
        mapped[:raw] = process_discuzx_post(m['message'], m['id'])
        mapped[:created_at] = Time.zone.at(m['created_at'])
        thread_id = "pm_#{m['thread_id']}"

        if is_first_pm(m['id'], m['thread_id'])
          # find the title from list table
          pm_thread = mysql_query("
                SELECT plid thread_id, subject
                  FROM #{table_name 'ucenter_pm_lists'}
                 WHERE plid = #{m['thread_id']};").first
          mapped[:title] = pm_thread['subject']
          mapped[:archetype] = Archetype.private_message

          # Find the users who are part of this private message.
          import_user_ids = mysql_query("
                SELECT plid thread_id, uid user_id
                  FROM #{table_name 'ucenter_pm_members'}
                 WHERE plid = #{m['thread_id']};
              ").map { |r| r['user_id'] }.uniq

          mapped[:target_usernames] = import_user_ids.map! do |import_user_id|
            import_user_id.to_s == m['user_id'].to_s ? nil : User.find_by(id: user_id_from_imported_user_id(import_user_id)).try(:username)
          end.compact

          if mapped[:target_usernames].empty? # pm with yourself?
            skip = true
            puts "Skipping pm:#{m['id']} due to no target"
          else
            @first_post_id_by_topic_id[thread_id] = mapped[:id]
          end
        else
          parent = topic_lookup_from_imported_post_id(@first_post_id_by_topic_id[thread_id])
          if parent
            mapped[:topic_id] = parent[:topic_id]
          else
            puts "Parent post pm thread:#{thread_id} doesn't exist. Skipping #{m["id"]}: #{m["message"][0..40]}"
            skip = true
          end
        end

        skip ? nil : mapped
      end

    end
  end

  # search for first pm id for the series of pm
  def is_first_pm(pm_id, thread_id)
    result = mysql_query("
          SELECT pmid id
            FROM #{table_name 'ucenter_pm_indexes'}
           WHERE plid = #{thread_id}
        ORDER BY id")
    result.first['id'].to_s == pm_id.to_s
  end

  def process_and_upload_inline_images(raw)
    inline_image_regex = /\[img\]([\s\S]*?)\[\/img\]/

    s = raw.dup

    s.gsub!(inline_image_regex) do |d|
      matches = inline_image_regex.match(d)
      data = matches[1]

      upload, filename = upload_inline_image data
      upload ? html_for_upload(upload, filename) : nil
    end

  end

  def process_discuzx_post(raw, import_id)
    # raw = process_and_upload_inline_images(raw)
    s = raw.dup

    # Strip the quote
    # [quote] quotation includes the topic which is the same as reply to in Discourse
    # We get the pid to find the post number the post reply to. So it can be stripped
    s = s.gsub(/\[b\]回复 \[url=forum.php\?mod=redirect&goto=findpost&pid=\d+&ptid=\d+\].* 的帖子\[\/url\]\[\/b\]/i, '').strip
    s = s.gsub(/\[b\]回复 \[url=https?:\/\/#{ORIGINAL_SITE_PREFIX}\/redirect.php\?goto=findpost&pid=\d+&ptid=\d+\].*?\[\/url\].*?\[\/b\]/i, '').strip

    s.gsub!(/\[quote\](.*)?\[\/quote\]/im) do |matched|
      content = $1
      post_import_id = find_post_id_by_quote_number(content)
      if post_import_id
        post_id = post_id_from_imported_post_id(post_import_id.to_i)
        if (post = Post.find_by(id: post_id))
          "[quote=\"#{post.user.username}\", post: #{post.post_number}, topic: #{post.topic_id}]\n#{content}\n[/quote]"
        else
          puts "post #{import_id} quote to not exists post #{post_import_id}, skip reply"
          matched[0]
        end
      else
        matched[0]
      end
    end

    s.gsub!(/\[size=2\]\[color=#999999\].*? 发表于 [\d\-\: ]*\[\/color\] \[url=forum.php\?mod=redirect&goto=findpost&pid=\d+&ptid=\d+\].*?\[\/url\]\[\/size\]/i, '')
    s.gsub!(/\[size=2\]\[color=#999999\].*? 发表于 [\d\-\: ]*\[\/color\] \[url=https?:\/\/#{ORIGINAL_SITE_PREFIX}\/redirect.php\?goto=findpost&pid=\d+&ptid=\d+\].*?\[\/url\]\[\/size\]/i, '')

    # convert quote
    s.gsub!(/\[quote\](.*?)\[\/quote\]/m) { "\n" + ($1.strip).gsub(/^/, '> ') + "\n" }

    # truncate line space, preventing line starting with many blanks to be parsed as code blocks
    s.gsub!(/^ {4,}/, '   ')

    # TODO: Much better to use bbcode-to-md gem
    # Convert image bbcode with width and height
    s.gsub!(/\[img[^\]]*\]https?:\/\/#{ORIGINAL_SITE_PREFIX}\/(.*)\[\/img\]/i, '[x-attach]\1[/x-attach]') # dont convert attachment
    s.gsub!(/<img[^>]*src="https?:\/\/#{ORIGINAL_SITE_PREFIX}\/(.*)".*?>/i, '[x-attach]\1[/x-attach]') # dont convert attachment
    s.gsub!(/\[img[^\]]*\]https?:\/\/www\.touhou\.cc\/blog\/(.*)\[\/img\]/i, '[x-attach]../blog/\1[/x-attach]') # 私货
    s.gsub!(/\[img[^\]]*\]https?:\/\/www\.touhou\.cc\/ucenter\/avatar.php\?uid=(\d+)[^\]]*\[\/img\]/i) { "[x-attach]#{discuzx_avatar_fullpath($1, false)[0]}[/x-attach]" } # 私货
    s.gsub!(/\[img=(\d+),(\d+)\]([^\]]*)\[\/img\]/i, '<img width="\1" height="\2" src="\3">')
    s.gsub!(/\[img\]([^\]]*)\[\/img\]/i, '<img src="\1">')

    s.gsub!(/\[qq\]([^\]]*)\[\/qq\]/i, '<a href="http://wpa.qq.com/msgrd?V=3&Uin=\1&Site=[Discuz!]&from=discuz&Menu=yes" target="_blank"><!--<img src="static/image/common/qq_big.gif" border="0">-->QQ 交谈</a>')

    s.gsub!(/\[email\]([^\]]*)\[\/email\]/i, '[url=mailto:\1]\1[/url]') # bbcode-to-md can convert it
    s.gsub!(/\[s\]([^\]]*)\[\/s\]/i, '<s>\1</s>')
    s.gsub!(/\[sup\]([^\]]*)\[\/sup\]/i, '<sup>\1</sup>')
    s.gsub!(/\[sub\]([^\]]*)\[\/sub\]/i, '<sub>\1</sub>')
    s.gsub!(/\[hr\]/i, "\n---\n")

    # remove the media tag
    s.gsub!(/\[\/?media[^\]]*\]/i, "\n")
    s.gsub!(/\[\/?flash[^\]]*\]/i, "\n")
    s.gsub!(/\[\/?audio[^\]]*\]/i, "\n")
    s.gsub!(/\[\/?video[^\]]*\]/i, "\n")

    # Remove the font, p and backcolor tag
    # Discourse doesn't support the font tag
    s.gsub!(/\[font=[^\]]*?\]/i, '')
    s.gsub!(/\[\/font\]/i, '')
    s.gsub!(/\[p=[^\]]*?\]/i, '')
    s.gsub!(/\[\/p\]/i, '')
    s.gsub!(/\[backcolor=[^\]]*?\]/i, '')
    s.gsub!(/\[\/backcolor\]/i, '')

    # Remove the size tag
    # I really have no idea what is this
    s.gsub!(/\[size=[^\]]*?\]/i, '')
    s.gsub!(/\[\/size\]/i, '')

    # Remove the color tag
    s.gsub!(/\[color=[^\]]*?\]/i, '')
    s.gsub!(/\[\/color\]/i, '')

    # Remove the hide tag
    s.gsub!(/\[\/?hide\]/i, '')
    s.gsub!(/\[\/?free[^\]]*\]/i, "\n")

    # Remove the align tag
    # still don't know what it is
    s.gsub!(/\[align=[^\]]*?\]/i, "\n")
    s.gsub!(/\[\/align\]/i, "\n")
    s.gsub!(/\[float=[^\]]*?\]/i, "\n")
    s.gsub!(/\[\/float\]/i, "\n")

    # Convert code
    s.gsub!(/\[\/?code\]/i, "\n```\n")

    # The edit notice should be removed
    # example: 本帖最后由 Helloworld 于 2015-1-28 22:05 编辑
    s.gsub!(/\[i=s\] 本帖最后由[\s\S]*?编辑 \[\/i\]/, '')

    # Convert the custom smileys to emojis
    # `{:cry:}` to `:cry`
    s.gsub!(/\{(\:\S*?\:)\}/, '\1')

    # Replace internal forum links that aren't in the <!-- l --> format
    # convert list tags to ul and list=1 tags to ol
    # (basically, we're only missing list=a here...)
    s.gsub!(/\[list\](.*?)\[\/list:u\]/m, '[ul]\1[/ul]')
    s.gsub!(/\[list=1\](.*?)\[\/list:o\]/m, '[ol]\1[/ol]')
    # convert *-tags to li-tags so bbcode-to-md can do its magic on phpBB's lists:
    s.gsub!(/\[\*\](.*?)\[\/\*:m\]/, '[li]\1[/li]')

    # Discuz can create PM out of a post, which will generates like
    # [url=http://example.com/forum.php?mod=redirect&goto=findpost&pid=111&ptid=11][b]关于您在“主题名称”的帖子[/b][/url]
    s.gsub!(pm_url_regexp) do |discuzx_link|
      replace_internal_link(discuzx_link, $1)
    end

    # [url][b]text[/b][/url] to **[url]text[/url]**
    s.gsub!(/(\[url=[^\[\]]*?\])\[b\](\S*)\[\/b\](\[\/url\])/, '**\1\2\3**')

    @internal_url_regexps.each do |internal_url_regexp|
      s.gsub!(internal_url_regexp) do |discuzx_link|
        replace_internal_link(discuzx_link, ($~[:tid].to_i rescue nil), ($~[:pid].to_i rescue nil), ($~[:fid].to_i rescue nil), ($~[:action] rescue nil))
      end
    end

    # @someone without the url
    s.gsub!(/@\[url=[^\[\]]*?\](\S*)\[\/url\]/i, '@\1')

    s.scan(/http(?:s)?:\/\/#{ORIGINAL_SITE_PREFIX.gsub('.', '\.')}\/[^\[\]\s]*/) { |link|puts "WARNING: post #{import_id} can't replace internal url #{link}" }

    s.strip
  end

  def replace_internal_link(discuzx_link, import_topic_id, import_post_id, import_category_id, action)
    if import_post_id
      post_id = post_id_from_imported_post_id import_post_id
      if post_id
        post = Post.find post_id
        return post.full_url if post
      end
    end

    if import_topic_id

      results = mysql_query("SELECT pid
                               FROM #{table_name 'forum_post'}
                              WHERE tid = #{import_topic_id} AND first
                              LIMIT 1")

      return discuzx_link unless results.size > 0

      linked_post_id = results.first['pid']
      lookup = topic_lookup_from_imported_post_id(linked_post_id)

      if lookup
        return "#{NEW_SITE_PREFIX}#{lookup[:url]}"
      else
        return discuzx_link
      end

    end

    if import_category_id
      category_id = category_id_from_imported_category_id import_category_id
      if category_id
        category = Category.find category_id
        return category.url if category
      end
    end

    case action
    when 'index'
      return "#{NEW_SITE_PREFIX}/"
    when 'stat', 'stats', 'ranklist'
      return "#{NEW_SITE_PREFIX}/users"
    end

    discuzx_link
  end

  def pm_url_regexp
    @pm_url_regexp ||= Regexp.new("http(?:s)?://#{ORIGINAL_SITE_PREFIX.gsub('.', '\.')}/forum\\.php\\?mod=redirect&goto=findpost&pid=\\d+&ptid=(\\d+)")
  end

  # This step is done separately because it can take multiple attempts to get right (because of
  # missing files, wrong paths, authorized extensions, etc.).
  def import_attachments
    setting = AUTHORIZED_EXTENSIONS.join('|')
    SiteSetting.authorized_extensions = setting if setting != SiteSetting.authorized_extensions

    attachment_regex = /\[attach\](\d+)\[\/attach\]/
    attachment_link_regex = /\[x-attach\](.+)\[\/x-attach\]/

    current_count = 0
    total_count = mysql_query("SELECT count(*) count FROM #{table_name 'forum_post'};").first['count']

    success_count = 0
    fail_count = 0

    puts '', "Importing attachments...", ''

    Post.find_each do |post|
      next unless post.custom_fields['import_id'] == post.custom_fields['import_id'].to_i.to_s

      user = post.user

      current_count += 1
      print_status current_count, total_count

      new_raw = post.raw.dup

      inline_attachments = []

      new_raw.gsub!(attachment_regex) do |s|
        attachment_id = $1.to_i
        inline_attachments.push attachment_id

        upload, filename = find_upload(user, post, attachment_id)
        unless upload
          fail_count += 1
          next
        end

        html_for_upload(upload, filename)
      end
      new_raw.gsub!(attachment_link_regex) do |s|
        attachment_file = $1

        filename = File.basename(attachment_file)
        upload = create_upload(user.id, File.join(DISCUZX_BASE_DIR, attachment_file), filename)
        unless upload
          fail_count += 1
          next
        end

        html_for_upload(upload, filename)
      end

      sql = "SELECT aid
          FROM #{table_name 'forum_attachment'}
          WHERE pid = #{post.custom_fields['import_id']}"
      if !inline_attachments.empty?
        sql << " AND aid NOT IN (#{inline_attachments.join(',')})"
      end

      results = mysql_query(sql)

      results.each do |attachment|
        attachment_id = attachment['aid']
        upload, filename = find_upload(user, post, attachment_id)
        unless upload
          fail_count += 1
          next
        end
        html = html_for_upload(upload, filename)
        unless new_raw.include? html
          new_raw << "\n"
          new_raw << html
        end
      end

      if new_raw != post.raw
        PostRevisor.new(post).revise!(post.user, { raw: new_raw }, bypass_bump: true, edit_reason: '从 Discuz 中导入附件')
      end

      success_count += 1
    end

    puts '', ''
    puts "succeeded: #{success_count}"
    puts "   failed: #{fail_count}" if fail_count > 0
    puts ''
  end

  # Create the full path to the discuz avatar specified from user id
  def discuzx_avatar_fullpath(user_id, absolute = true)
    padded_id = user_id.to_s.rjust(9, '0')

    part_1 = padded_id[0..2]
    part_2 = padded_id[3..4]
    part_3 = padded_id[5..6]
    part_4 = padded_id[-2..-1]
    file_name = "#{part_4}_avatar_big.jpg"

    if absolute
      return File.join(DISCUZX_BASE_DIR, AVATAR_DIR, part_1, part_2, part_3, file_name), file_name
    else
      return File.join(AVATAR_DIR, part_1, part_2, part_3, file_name), file_name
    end
  end

  # post id is in the quote block
  def find_post_id_by_quote_number(raw)
    case raw
    when /\[url=forum.php\?mod=redirect&goto=findpost&pid=(\d+)&ptid=\d+\]/ #standard
      $1
    when /\[url=https?:\/\/#{ORIGINAL_SITE_PREFIX}\/redirect.php\?goto=findpost&pid=(\d+)&ptid=\d+\]/ # old discuz 7 format
      $1
    when /\[quote\][\S\s]*pid=(\d+)[\S\s]*\[\/quote\]/ # quote
      $1
    end
  end

  # for some reason, discuz inlined some png file
  # the corresponding image stored is broken in a way
  def upload_inline_image(data)
    return unless data

    puts 'Creating inline image'

    encoded_photo = data['data:image/png;base64,'.length .. -1]
    if encoded_photo
      raw_file = Base64.decode64(encoded_photo)
    else
      puts 'Error parsed inline photo', data[0..20]
      return
    end

    real_filename = "#{SecureRandom.hex}.png"
    filename = Tempfile.new(['inline', '.png'])
    begin
      filename.binmode
      filename.write(raw_file)
      filename.rewind

      upload = create_upload(Discourse::SYSTEM_USER_ID, filename, real_filename)
    ensure
      filename.close rescue nil
      filename.unlink rescue nil
    end

    if upload.nil? || !upload.valid?
      puts "Upload not valid :("
      puts upload.errors.inspect if upload
      return nil
    end

    return upload, real_filename
  end

  # find the uploaded file and real name from the db
  def find_upload(user, post, upload_id)
    attachment_table = table_name 'forum_attachment'
    # search for table id
    sql = "SELECT a.pid post_id,
                  a.aid upload_id,
                  a.tableid table_id
             FROM #{attachment_table} a
            WHERE a.pid = #{post.custom_fields['import_id']}
              AND a.aid = #{upload_id};"
    results = mysql_query(sql)

    unless (meta_data = results.first)
      puts "Couldn't find forum_attachment record meta data for post.id = #{post.id}, import_id = #{post.custom_fields['import_id']}"
      return nil
    end

    # search for uploaded file meta data
    sql = "SELECT a.pid post_id,
                  a.aid upload_id,
                  a.tid topic_id,
                  a.uid user_id,
                  a.dateline uploaded_time,
                  a.filename real_filename,
                  a.attachment attachment_path,
                  a.remote is_remote,
                  a.description description,
                  a.isimage is_image,
                  a.thumb is_thumb
             FROM #{attachment_table}_#{meta_data['table_id']} a
            WHERE a.aid = #{upload_id};"
    results = mysql_query(sql)

    unless (row = results.first)
      puts "Couldn't find attachment record for post.id = #{post.id}, import_id = #{post.custom_fields['import_id']}"
      return nil
    end

    filename = File.join(DISCUZX_BASE_DIR, ATTACHMENT_DIR, row['attachment_path'])
    unless File.exists?(filename)
      puts "Attachment file doesn't exist: #{filename}"
      return nil
    end
    real_filename = row['real_filename']
    real_filename.prepend SecureRandom.hex if real_filename[0] == '.'
    upload = create_upload(user.id, filename, real_filename)

    if upload.nil? || !upload.valid?
      puts "Upload not valid :("
      puts upload.errors.inspect if upload
      return nil
    end

    return upload, real_filename
  rescue Mysql2::Error => e
    puts "SQL Error"
    puts e.message
    puts sql
    return nil
  end

  def first_exists(*items)
    items.find { |item|!item.blank? } || ''
  end

  def mysql_query(sql)
    @client.query(sql, cache_rows: false)
  end
end

ImportScripts::DiscuzX.new.perform