# frozen_string_literal: true require 'mysql2' require File.expand_path(File.dirname(__FILE__) + "/base.rb") require 'htmlentities' class ImportScripts::Modx < ImportScripts::Base BATCH_SIZE = 1000 # CHANGE THESE BEFORE RUNNING THE IMPORTER DB_HOST ||= ENV['DB_HOST'] || "localhost" DB_NAME ||= ENV['DB_NAME'] || "modx" DB_PW ||= ENV['DB_PW'] || "modex" DB_USER ||= ENV['DB_USER'] || "modx" TIMEZONE ||= ENV['TIMEZONE'] || "America/Los_Angeles" TABLE_PREFIX ||= ENV['TABLE_PREFIX'] || "modx_" ATTACHMENT_DIR ||= ENV['ATTACHMENT_DIR'] || '/path/to/your/attachment/folder' RANDOM_CATEGORY_COLOR ||= !ENV['RANDOM_CATEGORY_COLOR'].nil? SUSPEND_ALL_USERS ||= !ENV['SUSPEND_ALL_USERS'] # TODO: replace modx_ with #{TABLE_PREFIX} puts "#{DB_USER}:#{DB_PW}@#{DB_HOST} wants #{DB_NAME}" def initialize super SiteSetting.disable_emails = "non-staff" @old_username_to_new_usernames = {} @tz = TZInfo::Timezone.get(TIMEZONE) @htmlentities = HTMLEntities.new @client = Mysql2::Client.new( host: DB_HOST, username: DB_USER, password: DB_PW, database: DB_NAME ) rescue Exception => e puts '=' * 50 puts e.message puts <<~TEXT Cannot connect in to database. Hostname: #{DB_HOST} Username: #{DB_USER} Password: #{DB_PW} database: #{DB_NAME} Edit the script or set these environment variables: export DB_HOST="localhost" export DB_NAME="modx" export DB_PW="modx" export DB_USER="modx" export TABLE_PREFIX="modx_" export ATTACHMENT_DIR '/path/to/your/attachment/folder' Exiting. TEXT exit end def execute import_users import_categories import_topics_and_posts deactivate_all_users suspend_users end def not_import_groups puts "", "importing groups..." groups = mysql_query <<-SQL SELECT usergroupid, title FROM #{TABLE_PREFIX}usergroup ORDER BY usergroupid SQL create_groups(groups) do |group| { id: group["usergroupid"], name: @htmlentities.decode(group["title"]).strip } end end def import_users puts "", "importing users" user_count = mysql_query("SELECT COUNT(id) count FROM #{TABLE_PREFIX}discuss_users").first["count"] last_user_id = -1 batches(BATCH_SIZE) do |offset| users = mysql_query(<<-SQL SELECT id as userid, email, concat (name_first, " ", name_last) as name, username, location, website, status, last_active as last_seen_at, createdon as created_at, birthdate as date_of_birth, avatar as avatar_url, password, salt, display_name, use_display_name FROM #{TABLE_PREFIX}discuss_users WHERE id > #{last_user_id} ORDER BY id LIMIT #{BATCH_SIZE}; SQL ).to_a break if users.empty? last_user_id = users[-1]["userid"] before = users.size users.reject! { |u| @lookup.user_already_imported?(u["userid"].to_i) } create_users(users, total: user_count, offset: offset) do |user| { id: user["userid"], name: user['name'], username: user['username'], email: user['email'], website: user['website'], created_at: parse_timestamp(user["created_at"]), last_seen_at: parse_timestamp(user["last_seen_at"]), date_of_birth: user['date_of_birth'], password: "#{user['password']}:#{user['salt']}" # not tested } end end end def import_categories # import modx_discuss_categories as categories # import modx_discuss_boards as subcategories puts "", "importing categories..." categories = mysql_query("select id, name, description from modx_discuss_categories").to_a create_categories(categories) do |category| puts "Creating #{category['name']}" puts category { id: "cat#{category['id']}", name: category["name"], color: RANDOM_CATEGORY_COLOR ? (0..2).map { "%0x" % (rand * 0x80) }.join : nil, description: category["description"] } end puts "", "importing boards as subcategories..." boards = mysql_query("select id, category, name, description from modx_discuss_boards;").to_a create_categories(boards) do |category| puts category parent_category_id = category_id_from_imported_category_id("cat#{category['category']}") { id: category["id"], parent_category_id: parent_category_id.to_s, name: category["name"], color: RANDOM_CATEGORY_COLOR ? (0..2).map { "%0x" % (rand * 0x80) }.join : nil, description: category["description"] } end end def import_topics_and_posts puts "", "creating topics and posts" total_count = mysql_query("SELECT count(id) count from #{TABLE_PREFIX}discuss_posts").first["count"] topic_first_post_id = {} batches(BATCH_SIZE) do |offset| results = mysql_query(" SELECT id, thread topic_id, board category_id, title, message raw, parent, author user_id, createdon created_at from modx_discuss_posts ORDER BY createdon LIMIT #{BATCH_SIZE} OFFSET #{offset}; ") break if results.size < 1 next if all_records_exist? :posts, results.map { |m| m['id'].to_i } create_posts(results, total: total_count, offset: offset) do |m| skip = false mapped = {} mapped[:id] = m['id'] mapped[:user_id] = user_id_from_imported_user_id(m['user_id']) || -1 mapped[:raw] = post_process_raw(m['raw']) mapped[:created_at] = Time.zone.at(m['created_at']) if m['parent'] == 0 mapped[:category] = category_id_from_imported_category_id(m['category_id']) mapped[:title] = m['title'] topic_first_post_id[m['topic_id']] = m['id'] else parent = topic_lookup_from_imported_post_id(topic_first_post_id[m['topic_id']]) if parent mapped[:topic_id] = parent[:topic_id] else puts "Parent post #{first_post_id} doesn't exist. Skipping #{m["id"]}: #{m["title"][0..40]}" skip = true end end skip ? nil : mapped end end end def post_process_raw(raw) # [QUOTE]...[/QUOTE] raw = raw.gsub(/\[quote.*?\](.+?)\[\/quote\]/im) { |quote| quote = quote.gsub(/\[quote author=(.*?) .+\]/i) { "\n[quote=\"#{$1}\"]\n" } quote = quote.gsub(/[^\n]\[\/quote\]/im) { "\n[/quote]\n" } } raw end def not_mark_topics_as_solved puts "", "Marking topics as solved..." DB.exec <<~SQL INSERT INTO topic_custom_fields (name, value, topic_id, created_at, updated_at) SELECT 'accepted_answer_post_id', pcf.post_id, p.topic_id, p.created_at, p.created_at FROM post_custom_fields pcf JOIN posts p ON p.id = pcf.post_id WHERE pcf.name = 'is_accepted_answer' SQL end # find the uploaded file information from the db def not_find_upload(post, attachment_id) sql = "SELECT a.attachmentid attachment_id, a.userid user_id, a.filedataid file_id, a.filename filename, a.caption caption FROM #{TABLE_PREFIX}attachment a WHERE a.attachmentid = #{attachment_id}" results = mysql_query(sql) unless row = results.first puts "Couldn't find attachment record for post.id = #{post.id}, import_id = #{post.custom_fields['import_id']}" return end filename = File.join(ATTACHMENT_DIR, row['user_id'].to_s.split('').join('/'), "#{row['file_id']}.attach") unless File.exist?(filename) puts "Attachment file doesn't exist: #{filename}" return end real_filename = row['filename'] real_filename.prepend SecureRandom.hex if real_filename[0] == '.' upload = create_upload(post.user.id, filename, real_filename) if upload.nil? || !upload.valid? puts "Upload not valid :(" puts upload.errors.inspect if upload return end [upload, real_filename] rescue Mysql2::Error => e puts "SQL Error" puts e.message puts sql end def not_import_private_messages puts "", "importing private messages..." topic_count = mysql_query("SELECT COUNT(pmtextid) count FROM #{TABLE_PREFIX}pmtext").first["count"] last_private_message_id = -1 batches(BATCH_SIZE) do |offset| private_messages = mysql_query(<<-SQL SELECT pmtextid, fromuserid, title, message, touserarray, dateline FROM #{TABLE_PREFIX}pmtext WHERE pmtextid > #{last_private_message_id} ORDER BY pmtextid LIMIT #{BATCH_SIZE} SQL ).to_a break if private_messages.empty? last_private_message_id = private_messages[-1]["pmtextid"] private_messages.reject! { |pm| @lookup.post_already_imported?("pm-#{pm['pmtextid']}") } title_username_of_pm_first_post = {} create_posts(private_messages, total: topic_count, offset: offset) do |m| skip = false mapped = {} mapped[:id] = "pm-#{m['pmtextid']}" mapped[:user_id] = user_id_from_imported_user_id(m['fromuserid']) || Discourse::SYSTEM_USER_ID mapped[:raw] = preprocess_post_raw(m['message']) rescue nil mapped[:created_at] = Time.zone.at(m['dateline']) title = @htmlentities.decode(m['title']).strip[0...255] topic_id = nil next if mapped[:raw].blank? # users who are part of this private message. target_usernames = [] target_userids = [] begin to_user_array = PHP.unserialize(m['touserarray']) rescue puts "#{m['pmtextid']} -- #{m['touserarray']}" skip = true end begin to_user_array.each do |to_user| if to_user[0] == "cc" || to_user[0] == "bcc" # not sure if we should include bcc users to_user[1].each do |to_user_cc| user_id = user_id_from_imported_user_id(to_user_cc[0]) username = User.find_by(id: user_id).try(:username) target_userids << user_id || Discourse::SYSTEM_USER_ID target_usernames << username if username end else user_id = user_id_from_imported_user_id(to_user[0]) username = User.find_by(id: user_id).try(:username) target_userids << user_id || Discourse::SYSTEM_USER_ID target_usernames << username if username end end rescue puts "skipping pm-#{m['pmtextid']} `to_user_array` is not properly serialized -- #{to_user_array.inspect}" skip = true end participants = target_userids participants << mapped[:user_id] begin participants.sort! rescue puts "one of the participant's id is nil -- #{participants.inspect}" end if title =~ /^Re:/ parent_id = title_username_of_pm_first_post[[title[3..-1], participants]] || title_username_of_pm_first_post[[title[4..-1], participants]] || title_username_of_pm_first_post[[title[5..-1], participants]] || title_username_of_pm_first_post[[title[6..-1], participants]] || title_username_of_pm_first_post[[title[7..-1], participants]] || title_username_of_pm_first_post[[title[8..-1], participants]] if parent_id if t = topic_lookup_from_imported_post_id("pm-#{parent_id}") topic_id = t[:topic_id] end end else title_username_of_pm_first_post[[title, participants]] ||= m['pmtextid'] end unless topic_id mapped[:title] = title mapped[:archetype] = Archetype.private_message mapped[:target_usernames] = target_usernames.join(',') if mapped[:target_usernames].size < 1 # pm with yourself? # skip = true mapped[:target_usernames] = "system" puts "pm-#{m['pmtextid']} has no target (#{m['touserarray']})" end else mapped[:topic_id] = topic_id end skip ? nil : mapped end end end def not_import_attachments puts '', 'importing attachments...' current_count = 0 total_count = mysql_query(<<-SQL SELECT COUNT(postid) count FROM #{TABLE_PREFIX}post p JOIN #{TABLE_PREFIX}thread t ON t.threadid = p.threadid WHERE t.firstpostid <> p.postid SQL ).first["count"] success_count = 0 fail_count = 0 attachment_regex = /\[attach[^\]]*\](\d+)\[\/attach\]/i Post.find_each do |post| current_count += 1 print_status current_count, total_count new_raw = post.raw.dup new_raw.gsub!(attachment_regex) do |s| matches = attachment_regex.match(s) attachment_id = matches[1] upload, filename = find_upload(post, attachment_id) unless upload fail_count += 1 next end html_for_upload(upload, filename) end if new_raw != post.raw PostRevisor.new(post).revise!(post.user, { raw: new_raw }, bypass_bump: true, edit_reason: 'Import attachments from modx') end success_count += 1 end end def not_close_topics puts "", "Closing topics..." # keep track of closed topics closed_topic_ids = [] topics = mysql_query <<-SQL SELECT t.threadid threadid, firstpostid, open FROM #{TABLE_PREFIX}thread t JOIN #{TABLE_PREFIX}post p ON p.postid = t.firstpostid ORDER BY t.threadid SQL topics.each do |topic| topic_id = "thread-#{topic["threadid"]}" closed_topic_ids << topic_id if topic["open"] == 0 end sql = <<-SQL WITH closed_topic_ids AS ( SELECT t.id AS topic_id FROM post_custom_fields pcf JOIN posts p ON p.id = pcf.post_id JOIN topics t ON t.id = p.topic_id WHERE pcf.name = 'import_id' AND pcf.value IN (?) ) UPDATE topics SET closed = true WHERE id IN (SELECT topic_id FROM closed_topic_ids) SQL DB.exec(sql, closed_topic_ids) end def not_post_process_posts puts "", "Postprocessing posts..." current = 0 max = Post.count Post.find_each do |post| begin new_raw = postprocess_post_raw(post.raw) if new_raw != post.raw post.raw = new_raw post.save end rescue PrettyText::JavaScriptError nil ensure print_status(current += 1, max) end end end def not_create_permalink_file puts '', 'Creating Permalink File...', '' id_mapping = [] Topic.listable_topics.find_each do |topic| pcf = topic.first_post.custom_fields if pcf && pcf["import_id"] id = pcf["import_id"].split('-').last id_mapping.push("XXX#{id} YYY#{topic.id}") end end # Category.find_each do |cat| # ccf = cat.custom_fields # if ccf && ccf["import_id"] # id = ccf["import_id"].to_i # id_mapping.push("/forumdisplay.php?#{id} http://forum.quartertothree.com#{cat.url}") # end # end CSV.open(File.expand_path("../vb_map.csv", __FILE__), "w") do |csv| id_mapping.each do |value| csv << [value] end end end def deactivate_all_users User.where("id > 0 and admin != true").update_all(active: true) end def suspend_users puts '', "updating blocked users" banned = 0 failed = 0 total = mysql_query("SELECT count(*) count FROM #{TABLE_PREFIX}user_attributes where blocked != 0").first['count'] system_user = Discourse.system_user mysql_query("SELECT id, blockedafter, blockeduntil FROM #{TABLE_PREFIX}user_attributes").each do |b| user = User.find_by_id(user_id_from_imported_user_id(b['id'])) if user user.suspended_at = parse_timestamp(user["blockedafter"]) user.suspended_till = parse_timestamp(user["blockeduntil"]) if user.save StaffActionLogger.new(system_user).log_user_suspend(user, "banned during initial import") banned += 1 else puts "Failed to suspend user #{user.username}. #{user.errors.try(:full_messages).try(:inspect)}" failed += 1 end else puts "Not found: #{b['userid']}" failed += 1 end print_status banned + failed, total end end def parse_timestamp(timestamp) Time.zone.at(@tz.utc_to_local(timestamp)) end def mysql_query(sql) @client.query(sql, cache_rows: true) end end ImportScripts::Modx.new.perform