Improve the VBulletin importer (#5922)

This commit is contained in:
discoursehosting 2018-06-12 20:41:21 +02:00 committed by Gerhard Schlager
parent e0096b0d1c
commit fc973f9363
1 changed files with 74 additions and 38 deletions

View File

@ -32,9 +32,11 @@ class ImportScripts::VBulletin < ImportScripts::Base
puts "#{DB_USER}:#{DB_PW}@#{DB_HOST} wants #{DB_NAME}" puts "#{DB_USER}:#{DB_PW}@#{DB_HOST} wants #{DB_NAME}"
def initialize def initialize
@bbcode_to_md = true
super super
@old_username_to_new_usernames = {} @usernames = {}
@tz = TZInfo::Timezone.get(TIMEZONE) @tz = TZInfo::Timezone.get(TIMEZONE)
@ -107,6 +109,14 @@ EOM
end end
end end
def get_username_for_old_username(old_username)
if @usernames.has_key?(old_username)
@usernames[old_username]
else
old_username
end
end
def import_users def import_users
puts "", "importing users" puts "", "importing users"
@ -116,7 +126,8 @@ EOM
batches(BATCH_SIZE) do |offset| batches(BATCH_SIZE) do |offset|
users = mysql_query(<<-SQL users = mysql_query(<<-SQL
SELECT userid, username, homepage, usertitle, usergroupid, joindate, email SELECT userid, username, homepage, usertitle, usergroupid, joindate, email,
CONCAT(password, ':', salt) AS crypted_password
FROM #{TABLE_PREFIX}user FROM #{TABLE_PREFIX}user
WHERE userid > #{last_user_id} WHERE userid > #{last_user_id}
ORDER BY userid ORDER BY userid
@ -140,6 +151,7 @@ EOM
id: user["userid"], id: user["userid"],
name: username, name: username,
username: username, username: username,
password: user["crypted_password"],
email: email, email: email,
website: user["homepage"].strip, website: user["homepage"].strip,
title: @htmlentities.decode(user["usertitle"]).strip, title: @htmlentities.decode(user["usertitle"]).strip,
@ -147,13 +159,15 @@ EOM
created_at: parse_timestamp(user["joindate"]), created_at: parse_timestamp(user["joindate"]),
last_seen_at: parse_timestamp(user["lastvisit"]), last_seen_at: parse_timestamp(user["lastvisit"]),
post_create_action: proc do |u| post_create_action: proc do |u|
@old_username_to_new_usernames[user["username"]] = u.username
import_profile_picture(user, u) import_profile_picture(user, u)
import_profile_background(user, u) import_profile_background(user, u)
end end
} }
end end
end end
@usernames = UserCustomField.joins(:user).where(name: 'import_username').pluck('user_custom_fields.value', 'users.username').to_h
end end
def create_groups_membership def create_groups_membership
@ -321,16 +335,17 @@ EOM
t t
end end
# uncomment below lines to create permalink # Add the following to permalink_normalizations for this to work:
# topics.each do |thread| # /forum\/.*?\/(\d*)\-.*/thread/\1
# topic_id = "thread-#{thread["threadid"]}"
# topic = topic_lookup_from_imported_post_id(topic_id) topics.each do |thread|
# if topic.present? topic_id = "thread-#{thread["threadid"]}"
# title_slugified = thread["title"].gsub(" ","-").gsub(".","-") if thread["title"].present? topic = topic_lookup_from_imported_post_id(topic_id)
# url_slug = "threads/#{thread["threadid"]}-#{title_slugified}" if thread["title"].present? if topic.present?
# Permalink.create(url: url_slug, topic_id: topic[:topic_id].to_i) if url_slug.present? && topic[:topic_id].present? url_slug = "thread/#{thread["threadid"]}" if thread["title"].present?
# end Permalink.create(url: url_slug, topic_id: topic[:topic_id].to_i) if url_slug.present? && topic[:topic_id].present?
# end end
end
end end
end end
@ -388,8 +403,9 @@ EOM
# find the uploaded file information from the db # find the uploaded file information from the db
def find_upload(post, attachment_id) def find_upload(post, attachment_id)
sql = "SELECT a.attachmentid attachment_id, a.userid user_id, a.filedataid file_id, a.filename filename, sql = "SELECT a.attachmentid attachment_id, a.userid user_id, a.filedataid file_id, a.filename filename,
a.caption caption LENGTH(fd.filedata) AS dbsize, filedata, a.caption caption
FROM #{TABLE_PREFIX}attachment a FROM #{TABLE_PREFIX}attachment a
LEFT JOIN #{TABLE_PREFIX}filedata fd ON fd.filedataid = a.filedataid
WHERE a.attachmentid = #{attachment_id}" WHERE a.attachmentid = #{attachment_id}"
results = mysql_query(sql) results = mysql_query(sql)
@ -399,13 +415,22 @@ EOM
end end
filename = File.join(ATTACHMENT_DIR, row['user_id'].to_s.split('').join('/'), "#{row['file_id']}.attach") filename = File.join(ATTACHMENT_DIR, row['user_id'].to_s.split('').join('/'), "#{row['file_id']}.attach")
unless File.exists?(filename)
puts "Attachment file doesn't exist: #{filename}"
return
end
real_filename = row['filename'] real_filename = row['filename']
real_filename.prepend SecureRandom.hex if real_filename[0] == '.' real_filename.prepend SecureRandom.hex if real_filename[0] == '.'
unless File.exists?(filename)
if row['dbsize'].to_i == 0
puts "Attachment file #{row['filedataid']} doesn't exist"
return nil
end
tmpfile = 'attach_' + row['filedataid'].to_s
filename = File.join('/tmp/', tmpfile)
File.open(filename, 'wb') { |f|
f.write(row['filedata'])
}
end
upload = create_upload(post.user.id, filename, real_filename) upload = create_upload(post.user.id, filename, real_filename)
if upload.nil? || !upload.valid? if upload.nil? || !upload.valid?
@ -620,8 +645,9 @@ EOM
Post.find_each do |post| Post.find_each do |post|
begin begin
old_raw = post.raw.dup
new_raw = postprocess_post_raw(post.raw) new_raw = postprocess_post_raw(post.raw)
if new_raw != post.raw if new_raw != old_raw
post.raw = new_raw post.raw = new_raw
post.save post.save
end end
@ -685,11 +711,8 @@ EOM
# [MENTION]<username>[/MENTION] # [MENTION]<username>[/MENTION]
raw.gsub!(/\[mention\](.+?)\[\/mention\]/i) do raw.gsub!(/\[mention\](.+?)\[\/mention\]/i) do
old_username = $1 new_username = get_username_for_old_username($1)
if @old_username_to_new_usernames.has_key?(old_username) "@#{new_username}"
old_username = @old_username_to_new_usernames[old_username]
end
"@#{old_username}"
end end
# [FONT=blah] and [COLOR=blah] # [FONT=blah] and [COLOR=blah]
@ -698,6 +721,7 @@ EOM
raw.gsub! /\[COLOR=#.*?\](.*?)\[\/COLOR\]/im, '\1' raw.gsub! /\[COLOR=#.*?\](.*?)\[\/COLOR\]/im, '\1'
raw.gsub! /\[SIZE=.*?\](.*?)\[\/SIZE\]/im, '\1' raw.gsub! /\[SIZE=.*?\](.*?)\[\/SIZE\]/im, '\1'
raw.gsub! /\[SUP\](.*?)\[\/SUP\]/im, '\1'
raw.gsub! /\[h=.*?\](.*?)\[\/h\]/im, '\1' raw.gsub! /\[h=.*?\](.*?)\[\/h\]/im, '\1'
# [CENTER]...[/CENTER] # [CENTER]...[/CENTER]
@ -705,10 +729,18 @@ EOM
# [INDENT]...[/INDENT] # [INDENT]...[/INDENT]
raw.gsub! /\[INDENT\](.*?)\[\/INDENT\]/im, '\1' raw.gsub! /\[INDENT\](.*?)\[\/INDENT\]/im, '\1'
raw.gsub! /\[TABLE\](.*?)\[\/TABLE\]/im, '\1'
raw.gsub! /\[TR\](.*?)\[\/TR\]/im, '\1' # Tables to MD
raw.gsub! /\[TD\](.*?)\[\/TD\]/im, '\1' raw.gsub!(/\[TABLE.*?\](.*?)\[\/TABLE\]/im) { |t|
raw.gsub! /\[TD="?.*?"?\](.*?)\[\/TD\]/im, '\1' rows = $1.gsub!(/\s*\[TR\](.*?)\[\/TR\]\s*/im) { |r|
cols = $1.gsub! /\s*\[TD.*?\](.*?)\[\/TD\]\s*/im, '|\1'
"#{cols}|\n"
}
header, rest = rows.split "\n", 2
c = header.count "|"
sep = "|---" * (c - 1)
"#{header}\n#{sep}|\n#{rest}\n"
}
# [QUOTE]...[/QUOTE] # [QUOTE]...[/QUOTE]
raw.gsub!(/\[quote\](.+?)\[\/quote\]/im) { |quote| raw.gsub!(/\[quote\](.+?)\[\/quote\]/im) { |quote|
@ -719,10 +751,8 @@ EOM
# [QUOTE=<username>]...[/QUOTE] # [QUOTE=<username>]...[/QUOTE]
raw.gsub!(/\[quote=([^;\]]+)\](.+?)\[\/quote\]/im) do raw.gsub!(/\[quote=([^;\]]+)\](.+?)\[\/quote\]/im) do
old_username, quote = $1, $2 old_username, quote = $1, $2
if @old_username_to_new_usernames.has_key?(old_username) new_username = get_username_for_old_username(old_username)
old_username = @old_username_to_new_usernames[old_username] "\n[quote=\"#{new_username}\"]\n#{quote}\n[/quote]\n"
end
"\n[quote=\"#{old_username}\"]\n#{quote}\n[/quote]\n"
end end
# [YOUTUBE]<id>[/YOUTUBE] # [YOUTUBE]<id>[/YOUTUBE]
@ -731,6 +761,9 @@ EOM
# [VIDEO=youtube;<id>]...[/VIDEO] # [VIDEO=youtube;<id>]...[/VIDEO]
raw.gsub!(/\[video=youtube;([^\]]+)\].*?\[\/video\]/i) { "\n//youtu.be/#{$1}\n" } raw.gsub!(/\[video=youtube;([^\]]+)\].*?\[\/video\]/i) { "\n//youtu.be/#{$1}\n" }
# Fix uppercase B U and I tags
raw.gsub!(/(\[\/?[BUI]\])/i) { $1.downcase }
# More Additions .... # More Additions ....
# [spoiler=Some hidden stuff]SPOILER HERE!![/spoiler] # [spoiler=Some hidden stuff]SPOILER HERE!![/spoiler]
@ -760,16 +793,19 @@ EOM
raw.gsub!(/\[quote=([^;]+);(\d+)\](.+?)\[\/quote\]/im) do raw.gsub!(/\[quote=([^;]+);(\d+)\](.+?)\[\/quote\]/im) do
old_username, post_id, quote = $1, $2, $3 old_username, post_id, quote = $1, $2, $3
if @old_username_to_new_usernames.has_key?(old_username) new_username = get_username_for_old_username(old_username)
old_username = @old_username_to_new_usernames[old_username]
end # There is a bug here when the first post in a topic is quoted.
# The first post in a topic does not have an post_custom_field referring to the post number,
# but it refers to thread-XXX instead, so this lookup fails miserably then.
# Fixing this would imply rewriting that logic completely.
if topic_lookup = topic_lookup_from_imported_post_id(post_id) if topic_lookup = topic_lookup_from_imported_post_id(post_id)
post_number = topic_lookup[:post_number] post_number = topic_lookup[:post_number]
topic_id = topic_lookup[:topic_id] topic_id = topic_lookup[:topic_id]
"\n[quote=\"#{old_username},post:#{post_number},topic:#{topic_id}\"]\n#{quote}\n[/quote]\n" "\n[quote=\"#{new_username},post:#{post_number},topic:#{topic_id}\"]\n#{quote}\n[/quote]\n"
else else
"\n[quote=\"#{old_username}\"]\n#{quote}\n[/quote]\n" "\n[quote=\"#{new_username}\"]\n#{quote}\n[/quote]\n"
end end
end end