mirror of
https://github.com/discourse/discourse.git
synced 2025-02-22 12:17:12 +00:00
Improvements to vBulletin bulk import script
- import attachments - import avatars - import user signatures - create permalink file - reconnect to MySQL db in case of failure
This commit is contained in:
parent
318aba1f53
commit
110512d4d0
@ -7,6 +7,8 @@ class BulkImport::VBulletin < BulkImport::Base
|
|||||||
|
|
||||||
TABLE_PREFIX = "vb_"
|
TABLE_PREFIX = "vb_"
|
||||||
SUSPENDED_TILL ||= Date.new(3000, 1, 1)
|
SUSPENDED_TILL ||= Date.new(3000, 1, 1)
|
||||||
|
ATTACHMENT_DIR ||= ENV['ATTACHMENT_DIR'] || '/shared/import/data/attachments'
|
||||||
|
AVATAR_DIR ||= ENV['AVATAR_DIR'] || '/shared/import/data/customavatars'
|
||||||
|
|
||||||
def initialize
|
def initialize
|
||||||
super
|
super
|
||||||
@ -25,7 +27,8 @@ class BulkImport::VBulletin < BulkImport::Base
|
|||||||
username: username,
|
username: username,
|
||||||
password: password,
|
password: password,
|
||||||
database: database,
|
database: database,
|
||||||
encoding: charset
|
encoding: charset,
|
||||||
|
reconnect: true
|
||||||
)
|
)
|
||||||
|
|
||||||
@client.query_options.merge!(as: :array, cache_rows: false)
|
@client.query_options.merge!(as: :array, cache_rows: false)
|
||||||
@ -41,6 +44,15 @@ class BulkImport::VBulletin < BulkImport::Base
|
|||||||
end
|
end
|
||||||
|
|
||||||
def execute
|
def execute
|
||||||
|
# enable as per requirement:
|
||||||
|
# SiteSetting.automatic_backups_enabled = false
|
||||||
|
# SiteSetting.disable_emails = "non-staff"
|
||||||
|
# SiteSetting.authorized_extensions = '*'
|
||||||
|
# SiteSetting.max_image_size_kb = 102400
|
||||||
|
# SiteSetting.max_attachment_size_kb = 102400
|
||||||
|
# SiteSetting.clean_up_uploads = false
|
||||||
|
# SiteSetting.clean_orphan_uploads_grace_period_hours = 43200
|
||||||
|
|
||||||
import_groups
|
import_groups
|
||||||
import_users
|
import_users
|
||||||
import_group_users
|
import_group_users
|
||||||
@ -61,6 +73,11 @@ class BulkImport::VBulletin < BulkImport::Base
|
|||||||
import_private_topics
|
import_private_topics
|
||||||
import_topic_allowed_users
|
import_topic_allowed_users
|
||||||
import_private_posts
|
import_private_posts
|
||||||
|
|
||||||
|
create_permalink_file
|
||||||
|
import_attachments
|
||||||
|
import_avatars
|
||||||
|
import_signatures
|
||||||
end
|
end
|
||||||
|
|
||||||
def import_groups
|
def import_groups
|
||||||
@ -462,6 +479,201 @@ class BulkImport::VBulletin < BulkImport::Base
|
|||||||
end
|
end
|
||||||
end
|
end
|
||||||
|
|
||||||
|
def create_permalink_file
|
||||||
|
puts '', 'Creating Permalink File...', ''
|
||||||
|
|
||||||
|
id_mapping = []
|
||||||
|
|
||||||
|
Topic.listable_topics.find_each do |topic|
|
||||||
|
pcf = topic.first_post.custom_fields
|
||||||
|
if pcf && pcf["import_id"]
|
||||||
|
id = pcf["import_id"].split('-').last
|
||||||
|
id_mapping.push("XXX#{id} YYY#{topic.id}")
|
||||||
|
end
|
||||||
|
end
|
||||||
|
|
||||||
|
# Category.find_each do |cat|
|
||||||
|
# ccf = cat.custom_fields
|
||||||
|
# if ccf && ccf["import_id"]
|
||||||
|
# id = ccf["import_id"].to_i
|
||||||
|
# id_mapping.push("/forumdisplay.php?#{id} http://forum.quartertothree.com#{cat.url}")
|
||||||
|
# end
|
||||||
|
# end
|
||||||
|
|
||||||
|
CSV.open(File.expand_path("../vb_map.csv", __FILE__), "w") do |csv|
|
||||||
|
id_mapping.each do |value|
|
||||||
|
csv << [value]
|
||||||
|
end
|
||||||
|
end
|
||||||
|
end
|
||||||
|
|
||||||
|
# find the uploaded file information from the db
|
||||||
|
def find_upload(post, attachment_id)
|
||||||
|
sql = "SELECT a.attachmentid attachment_id, a.userid user_id, a.filename filename,
|
||||||
|
a.filedata filedata, a.extension extension
|
||||||
|
FROM #{TABLE_PREFIX}attachment a
|
||||||
|
WHERE a.attachmentid = #{attachment_id}"
|
||||||
|
results = mysql_query(sql)
|
||||||
|
|
||||||
|
unless row = results.first
|
||||||
|
puts "Couldn't find attachment record for attachment_id = #{attachment_id} post.id = #{post.id}"
|
||||||
|
return
|
||||||
|
end
|
||||||
|
|
||||||
|
attachment_id = row[0]
|
||||||
|
user_id = row[1]
|
||||||
|
db_filename = row[2]
|
||||||
|
|
||||||
|
filename = File.join(ATTACHMENT_DIR, user_id.to_s.split('').join('/'), "#{attachment_id}.attach")
|
||||||
|
real_filename = db_filename
|
||||||
|
real_filename.prepend SecureRandom.hex if real_filename[0] == '.'
|
||||||
|
|
||||||
|
unless File.exists?(filename)
|
||||||
|
puts "Attachment file #{row.inspect} doesn't exist"
|
||||||
|
return nil
|
||||||
|
end
|
||||||
|
|
||||||
|
upload = create_upload(post.user.id, filename, real_filename)
|
||||||
|
|
||||||
|
if upload.nil? || !upload.valid?
|
||||||
|
puts "Upload not valid :("
|
||||||
|
puts upload.errors.inspect if upload
|
||||||
|
return
|
||||||
|
end
|
||||||
|
|
||||||
|
[upload, real_filename]
|
||||||
|
rescue Mysql2::Error => e
|
||||||
|
puts "SQL Error"
|
||||||
|
puts e.message
|
||||||
|
puts sql
|
||||||
|
end
|
||||||
|
|
||||||
|
def import_attachments
|
||||||
|
puts '', 'importing attachments...'
|
||||||
|
|
||||||
|
RateLimiter.disable
|
||||||
|
current_count = 0
|
||||||
|
|
||||||
|
total_count = mysql_query(<<-SQL
|
||||||
|
SELECT COUNT(p.postid) count
|
||||||
|
FROM #{TABLE_PREFIX}post p
|
||||||
|
JOIN #{TABLE_PREFIX}thread t ON t.threadid = p.threadid
|
||||||
|
WHERE t.firstpostid <> p.postid
|
||||||
|
SQL
|
||||||
|
).first[0].to_i
|
||||||
|
|
||||||
|
success_count = 0
|
||||||
|
fail_count = 0
|
||||||
|
|
||||||
|
attachment_regex = /\[attach[^\]]*\](\d+)\[\/attach\]/i
|
||||||
|
|
||||||
|
Post.find_each do |post|
|
||||||
|
current_count += 1
|
||||||
|
print_status current_count, total_count
|
||||||
|
|
||||||
|
new_raw = post.raw.dup
|
||||||
|
new_raw.gsub!(attachment_regex) do |s|
|
||||||
|
matches = attachment_regex.match(s)
|
||||||
|
attachment_id = matches[1]
|
||||||
|
|
||||||
|
upload, filename = find_upload(post, attachment_id)
|
||||||
|
unless upload
|
||||||
|
fail_count += 1
|
||||||
|
next
|
||||||
|
# should we strip invalid attach tags?
|
||||||
|
end
|
||||||
|
|
||||||
|
html_for_upload(upload, filename)
|
||||||
|
end
|
||||||
|
|
||||||
|
if new_raw != post.raw
|
||||||
|
PostRevisor.new(post).revise!(post.user, { raw: new_raw }, bypass_bump: true, edit_reason: 'Import attachments from vBulletin')
|
||||||
|
end
|
||||||
|
|
||||||
|
success_count += 1
|
||||||
|
end
|
||||||
|
|
||||||
|
puts "", "imported #{success_count} attachments... failed: #{fail_count}."
|
||||||
|
RateLimiter.enable
|
||||||
|
end
|
||||||
|
|
||||||
|
def import_avatars
|
||||||
|
if AVATAR_DIR && File.exists?(AVATAR_DIR)
|
||||||
|
puts "", "importing user avatars"
|
||||||
|
|
||||||
|
RateLimiter.disable
|
||||||
|
start = Time.now
|
||||||
|
count = 0
|
||||||
|
|
||||||
|
Dir.foreach(AVATAR_DIR) do |item|
|
||||||
|
print "\r%7d - %6d/sec".freeze % [count, count.to_f / (Time.now - start)]
|
||||||
|
|
||||||
|
next if item == ('.') || item == ('..') || item == ('.DS_Store')
|
||||||
|
next unless item =~ /avatar(\d+)_(\d).gif/
|
||||||
|
scan = item.scan(/avatar(\d+)_(\d).gif/)
|
||||||
|
next unless scan[0][0].present?
|
||||||
|
u = UserCustomField.find_by(name: "import_id", value: scan[0][0]).try(:user)
|
||||||
|
next unless u.present?
|
||||||
|
# raise "User not found for id #{user_id}" if user.blank?
|
||||||
|
|
||||||
|
photo_real_filename = File.join(AVATAR_DIR, item)
|
||||||
|
puts "#{photo_real_filename} not found" unless File.exists?(photo_real_filename)
|
||||||
|
|
||||||
|
upload = create_upload(u.id, photo_real_filename, File.basename(photo_real_filename))
|
||||||
|
count += 1
|
||||||
|
if upload.persisted?
|
||||||
|
u.import_mode = false
|
||||||
|
u.create_user_avatar
|
||||||
|
u.import_mode = true
|
||||||
|
u.user_avatar.update(custom_upload_id: upload.id)
|
||||||
|
u.update(uploaded_avatar_id: upload.id)
|
||||||
|
else
|
||||||
|
puts "Error: Upload did not persist for #{u.username} #{photo_real_filename}!"
|
||||||
|
end
|
||||||
|
end
|
||||||
|
|
||||||
|
puts "", "imported #{count} avatars..."
|
||||||
|
RateLimiter.enable
|
||||||
|
end
|
||||||
|
end
|
||||||
|
|
||||||
|
def import_signatures
|
||||||
|
puts "Importing user signatures..."
|
||||||
|
|
||||||
|
total_count = mysql_query(<<-SQL
|
||||||
|
SELECT COUNT(userid) count
|
||||||
|
FROM #{TABLE_PREFIX}sigparsed
|
||||||
|
SQL
|
||||||
|
).first[0].to_i
|
||||||
|
current_count = 0
|
||||||
|
|
||||||
|
user_signatures = mysql_stream <<-SQL
|
||||||
|
SELECT userid, signatureparsed
|
||||||
|
FROM #{TABLE_PREFIX}sigparsed
|
||||||
|
ORDER BY userid
|
||||||
|
SQL
|
||||||
|
|
||||||
|
user_signatures.each do |sig|
|
||||||
|
current_count += 1
|
||||||
|
print_status current_count, total_count
|
||||||
|
user_id = sig[0]
|
||||||
|
user_sig = sig[1]
|
||||||
|
next unless user_id.present? && user_sig.present?
|
||||||
|
|
||||||
|
u = UserCustomField.find_by(name: "import_id", value: user_id).try(:user)
|
||||||
|
next unless u.present?
|
||||||
|
|
||||||
|
# can not hold dupes
|
||||||
|
UserCustomField.where(user_id: u.id, name: ["see_signatures", "signature_raw", "signature_cooked"]).destroy_all
|
||||||
|
|
||||||
|
user_sig.gsub!(/\[\/?sigpic\]/i, "")
|
||||||
|
|
||||||
|
UserCustomField.create!(user_id: u.id, name: "see_signatures", value: true)
|
||||||
|
UserCustomField.create!(user_id: u.id, name: "signature_raw", value: user_sig)
|
||||||
|
UserCustomField.create!(user_id: u.id, name: "signature_cooked", value: PrettyText.cook(user_sig, omit_nofollow: false))
|
||||||
|
end
|
||||||
|
end
|
||||||
|
|
||||||
def extract_pm_title(title)
|
def extract_pm_title(title)
|
||||||
normalize_text(title).scrub.gsub(/^Re\s*:\s*/i, "")
|
normalize_text(title).scrub.gsub(/^Re\s*:\s*/i, "")
|
||||||
end
|
end
|
||||||
@ -473,6 +685,17 @@ class BulkImport::VBulletin < BulkImport::Base
|
|||||||
date_of_birth.year < 1904 ? Date.new(1904, date_of_birth.month, date_of_birth.day) : date_of_birth
|
date_of_birth.year < 1904 ? Date.new(1904, date_of_birth.month, date_of_birth.day) : date_of_birth
|
||||||
end
|
end
|
||||||
|
|
||||||
|
def print_status(current, max, start_time = nil)
|
||||||
|
if start_time.present?
|
||||||
|
elapsed_seconds = Time.now - start_time
|
||||||
|
elements_per_minute = '[%.0f items/min] ' % [current / elapsed_seconds.to_f * 60]
|
||||||
|
else
|
||||||
|
elements_per_minute = ''
|
||||||
|
end
|
||||||
|
|
||||||
|
print "\r%9d / %d (%5.1f%%) %s" % [current, max, current / max.to_f * 100, elements_per_minute]
|
||||||
|
end
|
||||||
|
|
||||||
def mysql_stream(sql)
|
def mysql_stream(sql)
|
||||||
@client.query(sql, stream: true)
|
@client.query(sql, stream: true)
|
||||||
end
|
end
|
||||||
|
Loading…
x
Reference in New Issue
Block a user