From 110512d4d0102f107047a2a66fed4829f87d681b Mon Sep 17 00:00:00 2001 From: Arpit Jalan Date: Thu, 11 Apr 2019 12:35:19 +0530 Subject: [PATCH] Improvements to vBulletin bulk import script - import attachments - import avatars - import user signatures - create permalink file - reconnect to MySQL db in case of failure --- script/bulk_import/vbulletin.rb | 225 +++++++++++++++++++++++++++++++- 1 file changed, 224 insertions(+), 1 deletion(-) diff --git a/script/bulk_import/vbulletin.rb b/script/bulk_import/vbulletin.rb index 7361edb63b0..a16990053bf 100644 --- a/script/bulk_import/vbulletin.rb +++ b/script/bulk_import/vbulletin.rb @@ -7,6 +7,8 @@ class BulkImport::VBulletin < BulkImport::Base TABLE_PREFIX = "vb_" SUSPENDED_TILL ||= Date.new(3000, 1, 1) + ATTACHMENT_DIR ||= ENV['ATTACHMENT_DIR'] || '/shared/import/data/attachments' + AVATAR_DIR ||= ENV['AVATAR_DIR'] || '/shared/import/data/customavatars' def initialize super @@ -25,7 +27,8 @@ class BulkImport::VBulletin < BulkImport::Base username: username, password: password, database: database, - encoding: charset + encoding: charset, + reconnect: true ) @client.query_options.merge!(as: :array, cache_rows: false) @@ -41,6 +44,15 @@ class BulkImport::VBulletin < BulkImport::Base end def execute + # enable as per requirement: + # SiteSetting.automatic_backups_enabled = false + # SiteSetting.disable_emails = "non-staff" + # SiteSetting.authorized_extensions = '*' + # SiteSetting.max_image_size_kb = 102400 + # SiteSetting.max_attachment_size_kb = 102400 + # SiteSetting.clean_up_uploads = false + # SiteSetting.clean_orphan_uploads_grace_period_hours = 43200 + import_groups import_users import_group_users @@ -61,6 +73,11 @@ class BulkImport::VBulletin < BulkImport::Base import_private_topics import_topic_allowed_users import_private_posts + + create_permalink_file + import_attachments + import_avatars + import_signatures end def import_groups @@ -462,6 +479,201 @@ class BulkImport::VBulletin < BulkImport::Base end end + def create_permalink_file + puts '', 'Creating Permalink File...', '' + + id_mapping = [] + + Topic.listable_topics.find_each do |topic| + pcf = topic.first_post.custom_fields + if pcf && pcf["import_id"] + id = pcf["import_id"].split('-').last + id_mapping.push("XXX#{id} YYY#{topic.id}") + end + end + + # Category.find_each do |cat| + # ccf = cat.custom_fields + # if ccf && ccf["import_id"] + # id = ccf["import_id"].to_i + # id_mapping.push("/forumdisplay.php?#{id} http://forum.quartertothree.com#{cat.url}") + # end + # end + + CSV.open(File.expand_path("../vb_map.csv", __FILE__), "w") do |csv| + id_mapping.each do |value| + csv << [value] + end + end + end + + # find the uploaded file information from the db + def find_upload(post, attachment_id) + sql = "SELECT a.attachmentid attachment_id, a.userid user_id, a.filename filename, + a.filedata filedata, a.extension extension + FROM #{TABLE_PREFIX}attachment a + WHERE a.attachmentid = #{attachment_id}" + results = mysql_query(sql) + + unless row = results.first + puts "Couldn't find attachment record for attachment_id = #{attachment_id} post.id = #{post.id}" + return + end + + attachment_id = row[0] + user_id = row[1] + db_filename = row[2] + + filename = File.join(ATTACHMENT_DIR, user_id.to_s.split('').join('/'), "#{attachment_id}.attach") + real_filename = db_filename + real_filename.prepend SecureRandom.hex if real_filename[0] == '.' + + unless File.exists?(filename) + puts "Attachment file #{row.inspect} doesn't exist" + return nil + end + + upload = create_upload(post.user.id, filename, real_filename) + + if upload.nil? || !upload.valid? + puts "Upload not valid :(" + puts upload.errors.inspect if upload + return + end + + [upload, real_filename] + rescue Mysql2::Error => e + puts "SQL Error" + puts e.message + puts sql + end + + def import_attachments + puts '', 'importing attachments...' + + RateLimiter.disable + current_count = 0 + + total_count = mysql_query(<<-SQL + SELECT COUNT(p.postid) count + FROM #{TABLE_PREFIX}post p + JOIN #{TABLE_PREFIX}thread t ON t.threadid = p.threadid + WHERE t.firstpostid <> p.postid + SQL + ).first[0].to_i + + success_count = 0 + fail_count = 0 + + attachment_regex = /\[attach[^\]]*\](\d+)\[\/attach\]/i + + Post.find_each do |post| + current_count += 1 + print_status current_count, total_count + + new_raw = post.raw.dup + new_raw.gsub!(attachment_regex) do |s| + matches = attachment_regex.match(s) + attachment_id = matches[1] + + upload, filename = find_upload(post, attachment_id) + unless upload + fail_count += 1 + next + # should we strip invalid attach tags? + end + + html_for_upload(upload, filename) + end + + if new_raw != post.raw + PostRevisor.new(post).revise!(post.user, { raw: new_raw }, bypass_bump: true, edit_reason: 'Import attachments from vBulletin') + end + + success_count += 1 + end + + puts "", "imported #{success_count} attachments... failed: #{fail_count}." + RateLimiter.enable + end + + def import_avatars + if AVATAR_DIR && File.exists?(AVATAR_DIR) + puts "", "importing user avatars" + + RateLimiter.disable + start = Time.now + count = 0 + + Dir.foreach(AVATAR_DIR) do |item| + print "\r%7d - %6d/sec".freeze % [count, count.to_f / (Time.now - start)] + + next if item == ('.') || item == ('..') || item == ('.DS_Store') + next unless item =~ /avatar(\d+)_(\d).gif/ + scan = item.scan(/avatar(\d+)_(\d).gif/) + next unless scan[0][0].present? + u = UserCustomField.find_by(name: "import_id", value: scan[0][0]).try(:user) + next unless u.present? + # raise "User not found for id #{user_id}" if user.blank? + + photo_real_filename = File.join(AVATAR_DIR, item) + puts "#{photo_real_filename} not found" unless File.exists?(photo_real_filename) + + upload = create_upload(u.id, photo_real_filename, File.basename(photo_real_filename)) + count += 1 + if upload.persisted? + u.import_mode = false + u.create_user_avatar + u.import_mode = true + u.user_avatar.update(custom_upload_id: upload.id) + u.update(uploaded_avatar_id: upload.id) + else + puts "Error: Upload did not persist for #{u.username} #{photo_real_filename}!" + end + end + + puts "", "imported #{count} avatars..." + RateLimiter.enable + end + end + + def import_signatures + puts "Importing user signatures..." + + total_count = mysql_query(<<-SQL + SELECT COUNT(userid) count + FROM #{TABLE_PREFIX}sigparsed + SQL + ).first[0].to_i + current_count = 0 + + user_signatures = mysql_stream <<-SQL + SELECT userid, signatureparsed + FROM #{TABLE_PREFIX}sigparsed + ORDER BY userid + SQL + + user_signatures.each do |sig| + current_count += 1 + print_status current_count, total_count + user_id = sig[0] + user_sig = sig[1] + next unless user_id.present? && user_sig.present? + + u = UserCustomField.find_by(name: "import_id", value: user_id).try(:user) + next unless u.present? + + # can not hold dupes + UserCustomField.where(user_id: u.id, name: ["see_signatures", "signature_raw", "signature_cooked"]).destroy_all + + user_sig.gsub!(/\[\/?sigpic\]/i, "") + + UserCustomField.create!(user_id: u.id, name: "see_signatures", value: true) + UserCustomField.create!(user_id: u.id, name: "signature_raw", value: user_sig) + UserCustomField.create!(user_id: u.id, name: "signature_cooked", value: PrettyText.cook(user_sig, omit_nofollow: false)) + end + end + def extract_pm_title(title) normalize_text(title).scrub.gsub(/^Re\s*:\s*/i, "") end @@ -473,6 +685,17 @@ class BulkImport::VBulletin < BulkImport::Base date_of_birth.year < 1904 ? Date.new(1904, date_of_birth.month, date_of_birth.day) : date_of_birth end + def print_status(current, max, start_time = nil) + if start_time.present? + elapsed_seconds = Time.now - start_time + elements_per_minute = '[%.0f items/min] ' % [current / elapsed_seconds.to_f * 60] + else + elements_per_minute = '' + end + + print "\r%9d / %d (%5.1f%%) %s" % [current, max, current / max.to_f * 100, elements_per_minute] + end + def mysql_stream(sql) @client.query(sql, stream: true) end