Merge pull request #3978 from fantasticfears/discuz_x

Updating Discuz import script (Most work done by zh99998)
This commit is contained in:
Régis Hanol 2016-02-06 11:11:55 +01:00
commit 4b834253ef
1 changed files with 402 additions and 86 deletions

View File

@ -7,6 +7,8 @@
# This script is tested only on Simplified Chinese Discuz! X instances # This script is tested only on Simplified Chinese Discuz! X instances
# If you want to import data other than Simplified Chinese, email me. # If you want to import data other than Simplified Chinese, email me.
require 'php_serialize'
require 'miro'
require 'mysql2' require 'mysql2'
require File.expand_path(File.dirname(__FILE__) + "/base.rb") require File.expand_path(File.dirname(__FILE__) + "/base.rb")
@ -34,9 +36,23 @@ class ImportScripts::DiscuzX < ImportScripts::Base
database: DISCUZX_DB database: DISCUZX_DB
) )
@first_post_id_by_topic_id = {} @first_post_id_by_topic_id = {}
@internal_url_regexps = [
/http(?:s)?:\/\/#{ORIGINAL_SITE_PREFIX.gsub('.', '\.')}\/forum\.php\?mod=viewthread(?:&|&amp;)tid=(?<tid>\d+)(?:[^\[\]\s]*)(?:pid=?(?<pid>\d+))?(?:[^\[\]\s]*)/,
/http(?:s)?:\/\/#{ORIGINAL_SITE_PREFIX.gsub('.', '\.')}\/viewthread\.php\?tid=(?<tid>\d+)(?:[^\[\]\s]*)(?:pid=?(?<pid>\d+))?(?:[^\[\]\s]*)/,
/http(?:s)?:\/\/#{ORIGINAL_SITE_PREFIX.gsub('.', '\.')}\/forum\.php\?mod=redirect(?:&|&amp;)goto=findpost(?:&|&amp;)pid=(?<pid>\d+)(?:&|&amp;)ptid=(?<tid>\d+)(?:[^\[\]\s]*)/,
/http(?:s)?:\/\/#{ORIGINAL_SITE_PREFIX.gsub('.', '\.')}\/redirect\.php\?goto=findpost(?:&|&amp;)pid=(?<pid>\d+)(?:&|&amp;)ptid=(?<tid>\d+)(?:[^\[\]\s]*)/,
/http(?:s)?:\/\/#{ORIGINAL_SITE_PREFIX.gsub('.', '\.')}\/forumdisplay\.php\?fid=(?<fid>\d+)(?:[^\[\]\s]*)/,
/http(?:s)?:\/\/#{ORIGINAL_SITE_PREFIX.gsub('.', '\.')}\/forum\.php\?mod=forumdisplay(?:&|&amp;)fid=(?<fid>\d+)(?:[^\[\]\s]*)/,
/http(?:s)?:\/\/#{ORIGINAL_SITE_PREFIX.gsub('.', '\.')}\/(?<action>index)\.php(?:[^\[\]\s]*)/,
/http(?:s)?:\/\/#{ORIGINAL_SITE_PREFIX.gsub('.', '\.')}\/(?<action>stats)\.php(?:[^\[\]\s]*)/,
/http(?:s)?:\/\/#{ORIGINAL_SITE_PREFIX.gsub('.', '\.')}\/misc.php\?mod=(?<mod>stat|ranklist)(?:[^\[\]\s]*)/
]
end end
def execute def execute
get_knowledge_about_duplicated_email
import_users import_users
import_categories import_categories
import_posts import_posts
@ -53,19 +69,53 @@ class ImportScripts::DiscuzX < ImportScripts::Base
def get_knowledge_about_group def get_knowledge_about_group
group_table = table_name 'common_usergroup' group_table = table_name 'common_usergroup'
result = mysql_query( result = mysql_query(
"SELECT groupid group_id, radminid role_id, type, grouptitle title "SELECT groupid group_id, radminid role_id
FROM #{group_table};") FROM #{group_table};")
@moderator_group_id = -1 @moderator_group_id = []
@admin_group_id = -1 @admin_group_id = []
#@banned_group_id = [4,5] # 禁止的用户及其帖子均不导入,如果你想导入这些用户和帖子,请把这个数组清空。
result.each do |group| result.each do |group|
role_id = group['role_id'] case group['role_id']
group_id = group['group_id'] when 1 # 管理员
case group['title'].strip @admin_group_id << group['group_id']
when '管理员' when 2, 3 # 超级版主、版主。如果你不希望原普通版主成为Discourse版主把3去掉。
@admin_admin_id = role_id @moderator_group_id << group['group_id']
when '超级版主' end
@moderator_admin_id = role_id end
end
def get_knowledge_about_category_slug
@category_slug = {}
results = mysql_query("SELECT svalue value
FROM #{table_name 'common_setting'}
WHERE skey = 'forumkeys'")
return if results.size < 1
value = results.first['value']
return if value.blank?
PHP.unserialize(value).each do |category_import_id, slug|
next if slug.blank?
@category_slug[category_import_id] = slug
end
end
def get_knowledge_about_duplicated_email
@duplicated_email = {}
results = mysql_query(
"select a.uid uid, b.uid import_id from pre_common_member a
join (select uid, email from pre_common_member group by email having count(email) > 1 order by uid asc) b USING(email)
where a.uid != b.uid")
users = @lookup.instance_variable_get :@users
results.each do |row|
@duplicated_email[row['uid']] = row['import_id']
user_id = users[row['import_id']]
if user_id
users[row['uid']] = user_id
end end
end end
end end
@ -79,51 +129,63 @@ class ImportScripts::DiscuzX < ImportScripts::Base
user_table = table_name 'common_member' user_table = table_name 'common_member'
profile_table = table_name 'common_member_profile' profile_table = table_name 'common_member_profile'
status_table = table_name 'common_member_status' status_table = table_name 'common_member_status'
forum_table = table_name 'common_member_field_forum'
home_table = table_name 'common_member_field_home'
total_count = mysql_query("SELECT count(*) count FROM #{user_table};").first['count'] total_count = mysql_query("SELECT count(*) count FROM #{user_table};").first['count']
batches(BATCH_SIZE) do |offset| batches(BATCH_SIZE) do |offset|
results = mysql_query( results = mysql_query(
"SELECT u.uid id, u.username username, u.email email, u.adminid admin_id, su.regdate regdate, s.regip regip, "SELECT u.uid id, u.username username, u.email email, u.groupid group_id,
u.emailstatus email_confirmed, u.avatarstatus avatar_exists, p.site website, p.resideprovince province, su.regdate regdate, su.password password_hash, su.salt salt,
p.residecity city, p.residedist country, p.residecommunity community, p.residesuite apartment, s.regip regip, s.lastip last_visit_ip, s.lastvisit last_visit_time, s.lastpost last_posted_at, s.lastsendmail last_emailed_at,
p.bio bio, s.lastip last_visit_ip, s.lastvisit last_visit_time, s.lastpost last_posted_at, u.emailstatus email_confirmed, u.avatarstatus avatar_exists,
s.lastsendmail last_emailed_at p.site website, p.address address, p.bio bio, p.realname realname, p.qq qq,
p.resideprovince resideprovince, p.residecity residecity, p.residedist residedist, p.residecommunity residecommunity,
p.resideprovince birthprovince, p.birthcity birthcity, p.birthdist birthdist, p.birthcommunity birthcommunity,
h.spacecss spacecss, h.spacenote spacenote,
f.customstatus customstatus, f.sightml sightml
FROM #{user_table} u FROM #{user_table} u
JOIN #{sensitive_user_table} su ON su.uid = u.uid LEFT JOIN #{sensitive_user_table} su USING(uid)
JOIN #{profile_table} p ON p.uid = u.uid LEFT JOIN #{profile_table} p USING(uid)
JOIN #{status_table} s ON s.uid = u.uid LEFT JOIN #{status_table} s USING(uid)
LEFT JOIN #{forum_table} f USING(uid)
LEFT JOIN #{home_table} h USING(uid)
ORDER BY u.uid ASC ORDER BY u.uid ASC
LIMIT #{BATCH_SIZE} LIMIT #{BATCH_SIZE}
OFFSET #{offset};") OFFSET #{offset};")
break if results.size < 1 break if results.size < 1
next if all_records_exist? :users, users.map {|u| u["id"].to_i} # TODO: breaks the scipt reported by some users
# next if all_records_exist? :users, users.map {|u| u["id"].to_i}
create_users(results, total: total_count, offset: offset) do |user| create_users(results, total: total_count, offset: offset) do |user|
{ id: user['id'], { id: user['id'],
email: user['email'], email: user['email'],
username: user['username'], username: user['username'],
name: user['username'], name: first_exists(user['realname'], user['customstatus'], user['username']),
created_at: Time.zone.at(user['regdate']), import_pass: user['password_hash'],
active: true,
salt: user['salt'],
# TODO: title: user['customstatus'], # move custom title to name since discourse can't let user custom title https://meta.discourse.org/t/let-users-custom-their-title/37626
created_at: user['regdate'] ? Time.zone.at(user['regdate']) : nil,
registration_ip_address: user['regip'], registration_ip_address: user['regip'],
ip_address: user['last_visit_ip'], ip_address: user['last_visit_ip'],
last_seen_at: user['last_visit_time'], last_seen_at: user['last_visit_time'],
last_emailed_at: user['last_emailed_at'], last_emailed_at: user['last_emailed_at'],
last_posted_at: user['last_posted_at'], last_posted_at: user['last_posted_at'],
moderator: user['admin_id'] == @moderator_admin_id, moderator: @moderator_group_id.include?(user['group_id']),
admin: user['admin_id'] == @admin_admin_id, admin: @admin_group_id.include?(user['group_id']),
active: true, website: (user['website'] and user['website'].include?('.')) ? user['website'].strip : ( user['qq'] and user['qq'].strip == user['qq'].strip.to_i and user['qq'].strip.to_i > 10000 ) ? 'http://user.qzone.qq.com/' + user['qq'].strip : nil,
website: user['website'], bio_raw: first_exists((user['bio'] and CGI.unescapeHTML(user['bio'])), user['sightml'], user['spacenote']).strip[0,3000],
bio_raw: user['bio'], location: first_exists(user['address'], (!user['resideprovince'].blank? ? [user['resideprovince'], user['residecity'], user['residedist'], user['residecommunity']] : [user['birthprovince'], user['birthcity'], user['birthdist'], user['birthcommunity']]).reject{|location|location.blank?}.join(' ')),
location: "#{user['province']}#{user['city']}#{user['country']}#{user['community']}#{user['apartment']}",
post_create_action: lambda do |newmember| post_create_action: lambda do |newmember|
if user['avatar_exists'] == 1 and newmember.uploaded_avatar_id.blank? if user['avatar_exists'] == 1 and newmember.uploaded_avatar_id.blank?
path, filename = discuzx_avatar_fullpath(user['id']) path, filename = discuzx_avatar_fullpath(user['id'])
if path if path
begin begin
upload = create_upload(newmember.id, path, filename) upload = create_upload(newmember.id, path, filename)
if upload.persisted? if !upload.nil? && upload.persisted?
newmember.import_mode = false newmember.import_mode = false
newmember.create_user_avatar newmember.create_user_avatar
newmember.import_mode = true newmember.import_mode = true
@ -137,9 +199,42 @@ class ImportScripts::DiscuzX < ImportScripts::Base
end end
end end
end end
if !user['spacecss'].blank? and newmember.user_profile.profile_background.blank?
# profile background
if matched = user['spacecss'].match(/body\s*{[^}]*url\('?(.+?)'?\)/i)
body_background = matched[1].split(ORIGINAL_SITE_PREFIX, 2).last
end
if matched = user['spacecss'].match(/#hd\s*{[^}]*url\('?(.+?)'?\)/i)
header_background = matched[1].split(ORIGINAL_SITE_PREFIX, 2).last
end
if matched = user['spacecss'].match(/.blocktitle\s*{[^}]*url\('?(.+?)'?\)/i)
blocktitle_background = matched[1].split(ORIGINAL_SITE_PREFIX, 2).last
end
if matched = user['spacecss'].match(/#ct\s*{[^}]*url\('?(.+?)'?\)/i)
content_background = matched[1].split(ORIGINAL_SITE_PREFIX, 2).last
end
if body_background || header_background || blocktitle_background || content_background
profile_background = first_exists(header_background, body_background, content_background, blocktitle_background)
card_background = first_exists(content_background, body_background, header_background, blocktitle_background)
upload = create_upload(newmember.id, File.join(DISCUZX_BASE_DIR, profile_background), File.basename(profile_background))
if upload
newmember.user_profile.upload_profile_background upload
else
puts "WARNING: #{user['username']} (UID: #{user['id']}) profile_background file did not persist!"
end
upload = create_upload(newmember.id, File.join(DISCUZX_BASE_DIR, card_background), File.basename(card_background))
if upload
newmember.user_profile.upload_card_background upload
else
puts "WARNING: #{user['username']} (UID: #{user['id']}) card_background file did not persist!"
end
end
end
# we don't send email to the unconfirmed user # we don't send email to the unconfirmed user
newmember.update(email_digests: user['email_confirmed'] == 1) if newmember.email_digests newmember.update(email_digests: user['email_confirmed'] == 1) if newmember.email_digests
newmember.update(name: '') if !newmember.name.blank? and newmember.name == newmember.username
end end
} }
end end
@ -149,27 +244,57 @@ class ImportScripts::DiscuzX < ImportScripts::Base
def import_categories def import_categories
puts '', "creating categories" puts '', "creating categories"
get_knowledge_about_category_slug
forums_table = table_name 'forum_forum' forums_table = table_name 'forum_forum'
forums_data_table = table_name 'forum_forumfield' forums_data_table = table_name 'forum_forumfield'
results = mysql_query(" results = mysql_query("
SELECT f.fid id, f.fup parent_id, f.name, f.type type, f.status status, f.displayorder position, SELECT f.fid id, f.fup parent_id, f.name, f.type type, f.status status, f.displayorder position,
d.description description d.description description, d.rules rules, d.icon, d.extra extra
FROM #{forums_table} f FROM #{forums_table} f
JOIN #{forums_data_table} d ON f.fid = d.fid LEFT JOIN #{forums_data_table} d USING(fid)
ORDER BY parent_id ASC, id ASC ORDER BY parent_id ASC, id ASC
") ")
max_position = Category.all.max_by(&:position).position max_position = Category.all.max_by(&:position).position
create_categories(results) do |row| create_categories(results) do |row|
next if row['type'] == 'group' || row['status'].to_i == 3 next if row['type'] == 'group' or row['status'] == 2 # or row['status'].to_i == 3 # 如果不想导入群组,取消注释
extra = PHP.unserialize(row['extra']) if !row['extra'].blank?
if extra and !extra["namecolor"].blank?
color = extra["namecolor"][1,6]
end
Category.all.max_by(&:position).position Category.all.max_by(&:position).position
h = { h = {
id: row['id'], id: row['id'],
name: row['name'], name: row['name'],
description: row['description'], description: row['description'],
position: row['position'].to_i + max_position position: row['position'].to_i + max_position,
color: color,
suppress_from_homepage: (row['status'] == 0 or row['status'] == 3),
post_create_action: lambda do |category|
if slug = @category_slug[row['id']]
category.update(slug: slug)
end
raw = process_discuzx_post(row['rules'], nil)
if @bbcode_to_md
raw = raw.bbcode_to_md(false) rescue raw
end
category.topic.posts.first.update_attribute(:raw, raw)
if !row['icon'].empty?
upload = create_upload(Discourse::SYSTEM_USER_ID, File.join(DISCUZX_BASE_DIR, ATTACHMENT_DIR, '../common', row['icon']), File.basename(row['icon']))
if upload
category.logo_url = upload.url
# FIXME: I don't know how to get '/shared' by script. May change to Rails.root
category.color = Miro::DominantColors.new(File.join('/shared', category.logo_url)).to_hex.first[1,6] if !color
category.save!
end
end
category
end
} }
if row['parent_id'].to_i > 0 if row['parent_id'].to_i > 0
h[:parent_category_id] = category_id_from_imported_category_id(row['parent_id']) h[:parent_category_id] = category_id_from_imported_category_id(row['parent_id'])
@ -181,6 +306,7 @@ class ImportScripts::DiscuzX < ImportScripts::Base
def import_posts def import_posts
puts "", "creating topics and posts" puts "", "creating topics and posts"
users_table = table_name 'common_member'
posts_table = table_name 'forum_post' posts_table = table_name 'forum_post'
topics_table = table_name 'forum_thread' topics_table = table_name 'forum_thread'
@ -195,16 +321,18 @@ class ImportScripts::DiscuzX < ImportScripts::Base
p.authorid user_id, p.authorid user_id,
p.message raw, p.message raw,
p.dateline post_time, p.dateline post_time,
p.first is_first_post, p2.pid first_id,
p.invisible status p.invisible status,
FROM #{posts_table} p, t.special special
#{topics_table} t FROM #{posts_table} p
WHERE p.tid = t.tid JOIN #{posts_table} p2 ON p2.first AND p2.tid = p.tid
JOIN #{topics_table} t ON t.tid = p.tid
where t.tid < 10000
ORDER BY id ASC, topic_id ASC ORDER BY id ASC, topic_id ASC
LIMIT #{BATCH_SIZE} LIMIT #{BATCH_SIZE}
OFFSET #{offset}; OFFSET #{offset};
") ")
# u.status != -1 AND u.groupid != 4 AND u.groupid != 5 用户未被锁定、禁访或禁言。在现实中的 Discuz 论坛,禁止的用户通常是广告机或驱逐的用户,这些不需要导入。
break if results.size < 1 break if results.size < 1
next if all_records_exist? :posts, results.map {|p| p["id"].to_i} next if all_records_exist? :posts, results.map {|p| p["id"].to_i}
@ -218,40 +346,95 @@ class ImportScripts::DiscuzX < ImportScripts::Base
mapped[:raw] = process_discuzx_post(m['raw'], m['id']) mapped[:raw] = process_discuzx_post(m['raw'], m['id'])
mapped[:created_at] = Time.zone.at(m['post_time']) mapped[:created_at] = Time.zone.at(m['post_time'])
if m['is_first_post'] == 1 if m['id'] == m['first_id']
mapped[:category] = category_id_from_imported_category_id(m['category_id']) mapped[:category] = category_id_from_imported_category_id(m['category_id'])
mapped[:title] = CGI.unescapeHTML(m['title']) mapped[:title] = CGI.unescapeHTML(m['title'])
@first_post_id_by_topic_id[m['topic_id']] = m['id']
if m['special'] == 1
results = mysql_query("
SELECT multiple, maxchoices
FROM #{table_name 'forum_poll'}
WHERE tid = #{m['topic_id']}")
poll = results.first || {}
results = mysql_query("
SELECT polloption
FROM #{table_name 'forum_polloption'}
WHERE tid = #{m['topic_id']}
ORDER BY displayorder")
if results.empty?
puts "WARNING: can't find poll options for topic #{m['topic_id']}, skip poll"
else
mapped[:raw].prepend "[poll#{poll['multiple'] ? ' type=multiple' : ''}#{poll['maxchoices'] > 0 ? " max=#{poll['maxchoices']}" : ''}]\n#{results.map{|option|'- ' + option['polloption']}.join("\n")}\n[/poll]\n"
end
end
else else
parent = topic_lookup_from_imported_post_id(@first_post_id_by_topic_id[m['topic_id']]) parent = topic_lookup_from_imported_post_id(m['first_id'])
if parent if parent
mapped[:topic_id] = parent[:topic_id] mapped[:topic_id] = parent[:topic_id]
post_id = post_id_from_imported_post_id(find_post_id_by_quote_number(m['raw']).to_i) reply_post_import_id = find_post_id_by_quote_number(m['raw'])
if (post = Post.find_by(id: post_id)) if reply_post_import_id
mapped[:reply_to_post_number] = post.post_number post_id = post_id_from_imported_post_id(reply_post_import_id.to_i)
if (post = Post.find_by(id: post_id))
if post.topic_id == mapped[:topic_id]
mapped[:reply_to_post_number] = post.post_number
else
puts "post #{m['id']} reply to another topic, skip reply"
end
else
puts "post #{m['id']} reply to not exists post #{reply_post_import_id}, skip reply"
end
end end
else else
puts "Parent topic #{m['topic_id']} doesn't exist. Skipping #{m['id']}: #{m['title'][0..40]}" puts "Parent topic #{m['topic_id']} doesn't exist. Skipping #{m['id']}: #{m['title'][0..40]}"
skip = true skip = true
end end
end end
if [-5, -3, -1].include? m['status'] || mapped[:raw].blank? if m['status'] & 1 == 1 || mapped[:raw].blank?
mapped[:post_create_action] = lambda do |post| mapped[:post_create_action] = lambda do |post|
PostDestroyer.new(Discourse.system_user, post).perform_delete PostDestroyer.new(Discourse.system_user, post).perform_delete
end end
elsif m['status'] == -2# waiting for approve elsif (m['status'] & 2) >> 1 == 1 # waiting for approve
mapped[:post_create_action] = lambda do |post| mapped[:post_create_action] = lambda do |post|
PostAction.act(Discourse.system_user, post, 6, {take_action: false}) PostAction.act(Discourse.system_user, post, 6, {take_action: false})
end end
end end
skip ? nil : mapped skip ? nil : mapped
end end
end end
end end
def import_bookmarks
puts '', 'creating bookmarks'
favorites_table = table_name 'home_favorite'
posts_table = table_name 'forum_post'
total_count = mysql_query("SELECT count(*) count FROM #{favorites_table} WHERE idtype = 'tid'").first['count']
batches(BATCH_SIZE) do |offset|
results = mysql_query("
SELECT p.pid post_id, f.uid user_id
FROM #{favorites_table} f
JOIN #{posts_table} p ON f.id = p.tid
WHERE f.idtype = 'tid' AND p.first
LIMIT #{BATCH_SIZE}
OFFSET #{offset};")
break if results.size < 1
# next if all_records_exist?
create_bookmarks(results, total: total_count, offset: offset) do |row|
{
user_id: row['user_id'],
post_id: row['post_id']
}
end
end
end
def import_private_messages def import_private_messages
puts '', 'creating private messages' puts '', 'creating private messages'
@ -285,7 +468,7 @@ class ImportScripts::DiscuzX < ImportScripts::Base
break if results.size < 1 break if results.size < 1
next if all_records_exist? :posts, results.map {|m| "pm:#{m['id']}"} # next if all_records_exist? :posts, results.map {|m| "pm:#{m['id']}"}
create_posts(results, total: total_count, offset: offset) do |m| create_posts(results, total: total_count, offset: offset) do |m|
skip = false skip = false
@ -349,8 +532,9 @@ class ImportScripts::DiscuzX < ImportScripts::Base
result.first['id'].to_s == pm_id.to_s result.first['id'].to_s == pm_id.to_s
end end
def process_discuzx_post(raw, import_id) def process_and_upload_inline_images(raw)
inline_image_regex = /\[img\]([\s\S]*?)\[\/img\]/ inline_image_regex = /\[img\]([\s\S]*?)\[\/img\]/
s = raw.dup s = raw.dup
s.gsub!(inline_image_regex) do |d| s.gsub!(inline_image_regex) do |d|
@ -361,14 +545,65 @@ class ImportScripts::DiscuzX < ImportScripts::Base
upload ? html_for_upload(upload, filename) : nil upload ? html_for_upload(upload, filename) : nil
end end
end
def process_discuzx_post(raw, import_id)
# raw = process_and_upload_inline_images(raw)
s = raw.dup
# Strip the quote # Strip the quote
# [quote] quotation includes the topic which is the same as reply to in Discourse # [quote] quotation includes the topic which is the same as reply to in Discourse
# We get the pid to find the post number the post reply to. So it can be stripped # We get the pid to find the post number the post reply to. So it can be stripped
s = s.gsub(/\[quote\][\s\S]*?\[\/quote\]/i, '').strip
s = s.gsub(/\[b\]回复 \[url=forum.php\?mod=redirect&goto=findpost&pid=\d+&ptid=\d+\].* 的帖子\[\/url\]\[\/b\]/i, '').strip s = s.gsub(/\[b\]回复 \[url=forum.php\?mod=redirect&goto=findpost&pid=\d+&ptid=\d+\].* 的帖子\[\/url\]\[\/b\]/i, '').strip
s = s.gsub(/\[b\]回复 \[url=https?:\/\/#{ORIGINAL_SITE_PREFIX}\/redirect.php\?goto=findpost&pid=\d+&ptid=\d+\].*?\[\/url\].*?\[\/b\]/i, '').strip
# Convert image bbcode s.gsub!(/\[quote\](.*)?\[\/quote\]/im) do |matched|
content = $1
post_import_id = find_post_id_by_quote_number(content)
if post_import_id
post_id = post_id_from_imported_post_id(post_import_id.to_i)
if (post = Post.find_by(id: post_id))
"[quote=\"#{post.user.username}\", post: #{post.post_number}, topic: #{post.topic_id}]\n#{content}\n[/quote]"
else
puts "post #{import_id} quote to not exists post #{post_import_id}, skip reply"
matched[0]
end
else
matched[0]
end
end
s.gsub!(/\[size=2\]\[color=#999999\].*? 发表于 [\d\-\: ]*\[\/color\] \[url=forum.php\?mod=redirect&goto=findpost&pid=\d+&ptid=\d+\].*?\[\/url\]\[\/size\]/i, '')
s.gsub!(/\[size=2\]\[color=#999999\].*? 发表于 [\d\-\: ]*\[\/color\] \[url=https?:\/\/#{ORIGINAL_SITE_PREFIX}\/redirect.php\?goto=findpost&pid=\d+&ptid=\d+\].*?\[\/url\]\[\/size\]/i, '')
# convert quote
s.gsub!(/\[quote\](.*?)\[\/quote\]/m) { "\n" + ($1.strip).gsub(/^/, '> ') + "\n" }
# truncate line space, preventing line starting with many blanks to be parsed as code blocks
s.gsub!(/^ {4,}/, ' ')
# TODO: Much better to use bbcode-to-md gem
# Convert image bbcode with width and height
s.gsub!(/\[img[^\]]*\]https?:\/\/#{ORIGINAL_SITE_PREFIX}\/(.*)\[\/img\]/i, '[x-attach]\1[/x-attach]') # dont convert attachment
s.gsub!(/<img[^>]*src="https?:\/\/#{ORIGINAL_SITE_PREFIX}\/(.*)".*?>/i, '[x-attach]\1[/x-attach]') # dont convert attachment
s.gsub!(/\[img[^\]]*\]https?:\/\/www\.touhou\.cc\/blog\/(.*)\[\/img\]/i, '[x-attach]../blog/\1[/x-attach]') # 私货
s.gsub!(/\[img[^\]]*\]https?:\/\/www\.touhou\.cc\/ucenter\/avatar.php\?uid=(\d+)[^\]]*\[\/img\]/i) { "[x-attach]#{discuzx_avatar_fullpath($1,false)[0]}[/x-attach]" } # 私货
s.gsub!(/\[img=(\d+),(\d+)\]([^\]]*)\[\/img\]/i, '<img width="\1" height="\2" src="\3">') s.gsub!(/\[img=(\d+),(\d+)\]([^\]]*)\[\/img\]/i, '<img width="\1" height="\2" src="\3">')
s.gsub!(/\[img\]([^\]]*)\[\/img\]/i, '<img src="\1">')
s.gsub!(/\[qq\]([^\]]*)\[\/qq\]/i, '<a href="http://wpa.qq.com/msgrd?V=3&Uin=\1&Site=[Discuz!]&from=discuz&Menu=yes" target="_blank"><!--<img src="static/image/common/qq_big.gif" border="0">-->QQ 交谈</a>')
s.gsub!(/\[email\]([^\]]*)\[\/email\]/i, '[url=mailto:\1]\1[/url]') # bbcode-to-md can convert it
s.gsub!(/\[s\]([^\]]*)\[\/s\]/i, '<s>\1</s>')
s.gsub!(/\[sup\]([^\]]*)\[\/sup\]/i, '<sup>\1</sup>')
s.gsub!(/\[sub\]([^\]]*)\[\/sub\]/i, '<sub>\1</sub>')
s.gsub!(/\[hr\]/i, "\n---\n")
# remove the media tag
s.gsub!(/\[\/?media[^\]]*\]/i, "\n")
s.gsub!(/\[\/?flash[^\]]*\]/i, "\n")
s.gsub!(/\[\/?audio[^\]]*\]/i, "\n")
s.gsub!(/\[\/?video[^\]]*\]/i, "\n")
# Remove the font, p and backcolor tag # Remove the font, p and backcolor tag
# Discourse doesn't support the font tag # Discourse doesn't support the font tag
@ -390,11 +625,14 @@ class ImportScripts::DiscuzX < ImportScripts::Base
# Remove the hide tag # Remove the hide tag
s.gsub!(/\[\/?hide\]/i, '') s.gsub!(/\[\/?hide\]/i, '')
s.gsub!(/\[\/?free[^\]]*\]/i, "\n")
# Remove the align tag # Remove the align tag
# still don't know what it is # still don't know what it is
s.gsub!(/\[align=[^\]]*?\]/i, '') s.gsub!(/\[align=[^\]]*?\]/i, "\n")
s.gsub!(/\[\/align\]/i, "\n") s.gsub!(/\[\/align\]/i, "\n")
s.gsub!(/\[float=[^\]]*?\]/i, "\n")
s.gsub!(/\[\/float\]/i, "\n")
# Convert code # Convert code
s.gsub!(/\[\/?code\]/i, "\n```\n") s.gsub!(/\[\/?code\]/i, "\n```\n")
@ -424,39 +662,65 @@ class ImportScripts::DiscuzX < ImportScripts::Base
# [url][b]text[/b][/url] to **[url]text[/url]** # [url][b]text[/b][/url] to **[url]text[/url]**
s.gsub!(/(\[url=[^\[\]]*?\])\[b\](\S*)\[\/b\](\[\/url\])/, '**\1\2\3**') s.gsub!(/(\[url=[^\[\]]*?\])\[b\](\S*)\[\/b\](\[\/url\])/, '**\1\2\3**')
s.gsub!(internal_url_regexp) do |discuzx_link| @internal_url_regexps.each do |internal_url_regexp|
replace_internal_link(discuzx_link, $1) s.gsub!(internal_url_regexp) do |discuzx_link|
replace_internal_link(discuzx_link, ($~[:tid].to_i rescue nil), ($~[:pid].to_i rescue nil), ($~[:fid].to_i rescue nil), ($~[:action] rescue nil))
end
end end
# @someone without the url # @someone without the url
s.gsub!(/@\[url=[^\[\]]*?\](\S*)\[\/url\]/i, '@\1') s.gsub!(/@\[url=[^\[\]]*?\](\S*)\[\/url\]/i, '@\1')
s.scan(/http(?:s)?:\/\/#{ORIGINAL_SITE_PREFIX.gsub('.', '\.')}\/[^\[\]\s]*/) {|link|puts "WARNING: post #{import_id} can't replace internal url #{link}"}
s.strip s.strip
end end
def replace_internal_link(discuzx_link, import_topic_id) def replace_internal_link(discuzx_link, import_topic_id, import_post_id, import_category_id, action)
results = mysql_query("SELECT pid if import_post_id
FROM #{table_name 'forum_post'} post_id = post_id_from_imported_post_id import_post_id
WHERE tid = #{import_topic_id} if post_id
ORDER BY pid ASC post = Post.find post_id
LIMIT 1") return post.full_url if post
end
return discuzx_link unless results.size > 0
linked_topic_id = results.first['pid']
lookup = topic_lookup_from_imported_post_id(linked_topic_id)
return discuzx_link unless lookup
if (t = Topic.find_by(id: lookup[:topic_id]))
"#{NEW_SITE_PREFIX}/t/#{t.slug}/#{t.id}"
else
discuzx_link
end end
end
def internal_url_regexp if import_topic_id
@internal_url_regexp ||= Regexp.new("http(?:s)?://#{ORIGINAL_SITE_PREFIX.gsub('.', '\.')}/forum\\.php\\?mod=viewthread&tid=(\\d+)(?:[^\\]\\[]*)")
results = mysql_query("SELECT pid
FROM #{table_name 'forum_post'}
WHERE tid = #{import_topic_id} AND first
LIMIT 1")
return discuzx_link unless results.size > 0
linked_post_id = results.first['pid']
lookup = topic_lookup_from_imported_post_id(linked_post_id)
if lookup
return "#{NEW_SITE_PREFIX}#{lookup[:url]}"
else
return discuzx_link
end
end
if import_category_id
category_id = category_id_from_imported_category_id import_category_id
if category_id
category = Category.find category_id
return category.url if category
end
end
case action
when 'index'
return "#{NEW_SITE_PREFIX}/"
when 'stat', 'stats', 'ranklist'
return "#{NEW_SITE_PREFIX}/users"
end
discuzx_link
end end
def pm_url_regexp def pm_url_regexp
@ -470,8 +734,7 @@ class ImportScripts::DiscuzX < ImportScripts::Base
SiteSetting.authorized_extensions = setting if setting != SiteSetting.authorized_extensions SiteSetting.authorized_extensions = setting if setting != SiteSetting.authorized_extensions
attachment_regex = /\[attach\](\d+)\[\/attach\]/ attachment_regex = /\[attach\](\d+)\[\/attach\]/
attachment_link_regex = /\[x-attach\](.+)\[\/x-attach\]/
user = Discourse.system_user
current_count = 0 current_count = 0
total_count = mysql_query("SELECT count(*) count FROM #{table_name 'forum_post'};").first['count'] total_count = mysql_query("SELECT count(*) count FROM #{table_name 'forum_post'};").first['count']
@ -482,13 +745,20 @@ class ImportScripts::DiscuzX < ImportScripts::Base
puts '', "Importing attachments...", '' puts '', "Importing attachments...", ''
Post.find_each do |post| Post.find_each do |post|
next unless post.custom_fields['import_id'] == post.custom_fields['import_id'].to_i.to_s
user = post.user
current_count += 1 current_count += 1
print_status current_count, total_count print_status current_count, total_count
new_raw = post.raw.dup new_raw = post.raw.dup
inline_attachments = []
new_raw.gsub!(attachment_regex) do |s| new_raw.gsub!(attachment_regex) do |s|
matches = attachment_regex.match(s) attachment_id = $1.to_i
attachment_id = matches[1] inline_attachments.push attachment_id
upload, filename = find_upload(user, post, attachment_id) upload, filename = find_upload(user, post, attachment_id)
unless upload unless upload
@ -498,6 +768,41 @@ class ImportScripts::DiscuzX < ImportScripts::Base
html_for_upload(upload, filename) html_for_upload(upload, filename)
end end
new_raw.gsub!(attachment_link_regex) do |s|
attachment_file = $1
filename = File.basename(attachment_file)
upload = create_upload(user.id, File.join(DISCUZX_BASE_DIR, attachment_file), filename)
unless upload
fail_count += 1
next
end
html_for_upload(upload, filename)
end
sql = "SELECT aid
FROM #{table_name 'forum_attachment'}
WHERE pid = #{post.custom_fields['import_id']}"
if !inline_attachments.empty?
sql << " AND aid NOT IN (#{inline_attachments.join(',')})"
end
results = mysql_query(sql)
results.each do |attachment|
attachment_id = attachment['aid']
upload, filename = find_upload(user, post, attachment_id)
unless upload
fail_count += 1
next
end
html = html_for_upload(upload, filename)
unless new_raw.include? html
new_raw << "\n"
new_raw << html
end
end
if new_raw != post.raw if new_raw != post.raw
PostRevisor.new(post).revise!(post.user, { raw: new_raw }, { bypass_bump: true, edit_reason: '从 Discuz 中导入附件' }) PostRevisor.new(post).revise!(post.user, { raw: new_raw }, { bypass_bump: true, edit_reason: '从 Discuz 中导入附件' })
@ -513,7 +818,7 @@ class ImportScripts::DiscuzX < ImportScripts::Base
end end
# Create the full path to the discuz avatar specified from user id # Create the full path to the discuz avatar specified from user id
def discuzx_avatar_fullpath(user_id) def discuzx_avatar_fullpath(user_id, absolute=true)
padded_id = user_id.to_s.rjust(9, '0') padded_id = user_id.to_s.rjust(9, '0')
part_1 = padded_id[0..2] part_1 = padded_id[0..2]
@ -522,16 +827,23 @@ class ImportScripts::DiscuzX < ImportScripts::Base
part_4 = padded_id[-2..-1] part_4 = padded_id[-2..-1]
file_name = "#{part_4}_avatar_big.jpg" file_name = "#{part_4}_avatar_big.jpg"
return File.join(DISCUZX_BASE_DIR, AVATAR_DIR, part_1, part_2, part_3, file_name), file_name if absolute
return File.join(DISCUZX_BASE_DIR, AVATAR_DIR, part_1, part_2, part_3, file_name), file_name
else
return File.join(AVATAR_DIR, part_1, part_2, part_3, file_name), file_name
end
end end
# post id is in the quote block # post id is in the quote block
def find_post_id_by_quote_number(raw) def find_post_id_by_quote_number(raw)
s = raw.dup case raw
quote_reply = s.match(/\[quote\][\S\s]*pid=(\d+)[\S\s]*\[\/quote\]/) when /\[url=forum.php\?mod=redirect&goto=findpost&pid=(\d+)&ptid=\d+\]/ #standard
reply = s.match(/url=forum.php\?mod=redirect&goto=findpost&pid=(\d+)&ptid=\d+/) $1
when /\[url=https?:\/\/#{ORIGINAL_SITE_PREFIX}\/redirect.php\?goto=findpost&pid=(\d+)&ptid=\d+\]/ # old discuz 7 format
quote_reply ? quote_reply[1] : (reply ? reply[1] : nil) $1
when /\[quote\][\S\s]*pid=(\d+)[\S\s]*\[\/quote\]/ # quote
$1
end
end end
# for some reason, discuz inlined some png file # for some reason, discuz inlined some png file
@ -632,6 +944,10 @@ class ImportScripts::DiscuzX < ImportScripts::Base
return nil return nil
end end
def first_exists(*items)
items.find{|item|!item.blank?} || ''
end
def mysql_query(sql) def mysql_query(sql)
@client.query(sql, cache_rows: false) @client.query(sql, cache_rows: false)
end end