diff --git a/script/import_scripts/base.rb b/script/import_scripts/base.rb index c3278b3c2b0..7291d158f61 100644 --- a/script/import_scripts/base.rb +++ b/script/import_scripts/base.rb @@ -37,8 +37,8 @@ class ImportScripts::Base @posts[import_id] = post_id end - Post.pluck(:id, :topic_id, :post_number).each do |p,t,n| - @topic_lookup[p] = {topic_id: t, post_number: n} + Post.pluck(:id, :topic_id, :post_number).each do |post_id,t,n| + @topic_lookup[post_id] = {topic_id: t, post_number: n} end end @@ -86,7 +86,7 @@ class ImportScripts::Base # Get the Discourse User id based on the id of the source user def user_id_from_imported_user_id(import_id) - @existing_users[import_id] || @existing_users[import_id.to_s] || find_user_by_import_id(import_id) + @existing_users[import_id] || @existing_users[import_id.to_s] || find_user_by_import_id(import_id).try(:id) end def find_user_by_import_id(import_id) @@ -116,12 +116,12 @@ class ImportScripts::Base # Required fields are :id and :email, where :id is the id of the # user in the original datasource. The given id will not be used to # create the Discourse user record. - def create_users(results) - puts "creating users" + def create_users(results, opts={}) num_users_before = User.count users_created = 0 users_skipped = 0 progress = 0 + total = opts[:total] || results.size results.each do |result| u = yield(result) @@ -143,12 +143,10 @@ class ImportScripts::Base puts "Skipping user id #{u[:id]} because email is blank" end - print_status users_created + users_skipped + @failed_users.length, results.size + print_status users_created + users_skipped + @failed_users.length + (opts[:offset] || 0), total end - puts '' - puts "created: #{User.count - num_users_before} users" - puts " failed: #{@failed_users.size}" if @failed_users.size > 0 + return [users_created, users_skipped] end def create_user(opts, import_id) @@ -194,11 +192,19 @@ class ImportScripts::Base # create the Discourse category record. # Optional attributes are position, description, and parent_category_id. def create_categories(results) - puts "creating categories" + puts "", "creating categories" results.each do |c| params = yield(c) puts " #{params[:name]}" + + # make sure categories don't go more than 2 levels deep + if params[:parent_category_id] + top = Category.find_by_id(params[:parent_category_id]) + top = top.parent_category while top && !top.parent_category.nil? + params[:parent_category_id] = top.id if top + end + new_category = create_category(params, params[:id]) @categories[params[:id]] = new_category end @@ -245,10 +251,16 @@ class ImportScripts::Base else begin new_post = create_post(params, import_id) - @posts[import_id] = new_post.id - @topic_lookup[new_post.id] = {post_number: new_post.post_number, topic_id: new_post.topic_id} + if new_post.is_a?(Post) + @posts[import_id] = new_post.id + @topic_lookup[new_post.id] = {post_number: new_post.post_number, topic_id: new_post.topic_id} - created += 1 + created += 1 + else + skipped += 1 + puts "Error creating post #{import_id}. Skipping." + puts new_post.inspect + end rescue => e skipped += 1 puts "Error creating post #{import_id}. Skipping." @@ -276,7 +288,9 @@ class ImportScripts::Base opts[:raw] = opts[:raw].bbcode_to_md rescue opts[:raw] end - PostCreator.create(user, opts) + post_creator = PostCreator.new(user, opts) + post = post_creator.create + post ? post : post_creator.errors.full_messages end def close_inactive_topics(opts={}) diff --git a/script/import_scripts/drupal.rb b/script/import_scripts/drupal.rb index 967bd8cf704..448dd1e768c 100644 --- a/script/import_scripts/drupal.rb +++ b/script/import_scripts/drupal.rb @@ -115,8 +115,7 @@ class ImportScripts::Drupal < ImportScripts::Base raw: row['body'], created_at: Time.zone.at(row['created']), pinned_at: row['sticky'].to_i == 1 ? Time.zone.at(row['created']) : nil, - title: row['title'].try(:strip), - custom_fields: {import_id: "nid:#{row['nid']}"} + title: row['title'].try(:strip) } end end @@ -163,7 +162,6 @@ class ImportScripts::Drupal < ImportScripts::Base user_id: user_id_from_imported_user_id(row['uid']) || -1, raw: row['body'], created_at: Time.zone.at(row['created']), - custom_fields: {import_id: "cid:#{row['cid']}"} } if row['pid'] parent = topic_lookup_from_imported_post_id("cid:#{row['pid']}") diff --git a/script/import_scripts/kunena.rb b/script/import_scripts/kunena.rb index 6d562054a2f..a4b4d436db3 100644 --- a/script/import_scripts/kunena.rb +++ b/script/import_scripts/kunena.rb @@ -35,6 +35,8 @@ class ImportScripts::Kunena < ImportScripts::Base parse_users + puts "creating users" + create_users(@users) do |id, user| { id: id, email: user[:email], @@ -42,6 +44,7 @@ class ImportScripts::Kunena < ImportScripts::Base created_at: user[:created_at], bio_raw: user[:bio], moderator: user[:moderator] ? true : false, + admin: user[:admin] ? true : false, suspended_at: user[:suspended] ? Time.zone.now : nil, suspended_till: user[:suspended] ? 100.years.from_now : nil } end @@ -135,10 +138,9 @@ class ImportScripts::Kunena < ImportScripts::Base mapped = {} mapped[:id] = m['id'] - mapped[:user_id] = user_id_from_imported_user_id(m['userid']) || find_user_by_import_id(m['userid']).try(:id) || -1 + mapped[:user_id] = user_id_from_imported_user_id(m['userid']) || -1 mapped[:raw] = m["message"] mapped[:created_at] = Time.zone.at(m['time']) - mapped[:custom_fields] = {import_id: m['id']} if m['id'] == m['thread'] mapped[:category] = category_from_imported_category_id(m['catid']).try(:name) diff --git a/script/import_scripts/phpbb3.rb b/script/import_scripts/phpbb3.rb new file mode 100644 index 00000000000..9e4758246cb --- /dev/null +++ b/script/import_scripts/phpbb3.rb @@ -0,0 +1,251 @@ +require File.expand_path(File.dirname(__FILE__) + "/base.rb") + +require "mysql2" + +class ImportScripts::PhpBB3 < ImportScripts::Base + + PHPBB_DB = "phpbb" + BATCH_SIZE = 1000 + + def initialize + super + + @client = Mysql2::Client.new( + host: "localhost", + username: "root", + #password: "password", + database: PHPBB_DB + ) + end + + def execute + import_users + import_categories + import_posts + import_private_messages + suspend_users + end + + def import_users + puts '', "creating users" + + total_count = mysql_query("SELECT count(*) count + FROM phpbb_users u + JOIN phpbb_groups g ON g.group_id = u.group_id + WHERE g.group_name != 'BOTS' + AND u.user_type != 1;").first['count'] + + batches(BATCH_SIZE) do |offset| + results = mysql_query( + "SELECT user_id id, user_email email, username, user_regdate, group_name + FROM phpbb_users u + JOIN phpbb_groups g ON g.group_id = u.group_id + WHERE g.group_name != 'BOTS' + AND u.user_type != 1 + ORDER BY u.user_id ASC + LIMIT #{BATCH_SIZE} + OFFSET #{offset};") + + break if results.size < 1 + + create_users(results, total: total_count, offset: offset) do |user| + { id: user['id'], + email: user['email'], + username: user['username'], + created_at: Time.zone.at(user['user_regdate']), + moderator: user['group_name'] == 'GLOBAL_MODERATORS', + admin: user['group_name'] == 'ADMINISTRATORS' } + end + end + end + + def import_categories + results = mysql_query(" + SELECT forum_id id, parent_id, forum_name name, forum_desc description + FROM phpbb_forums + ORDER BY parent_id ASC, forum_id ASC + ") + + create_categories(results) do |row| + h = {id: row['id'], name: CGI.unescapeHTML(row['name']), description: CGI.unescapeHTML(row['description'])} + if row['parent_id'].to_i > 0 + parent = category_from_imported_category_id(row['parent_id']) + h[:parent_category_id] = parent.id if parent + end + h + end + end + + def import_posts + puts "", "creating topics and posts" + + total_count = mysql_query("SELECT count(*) count from phpbb_posts").first["count"] + + batches(BATCH_SIZE) do |offset| + results = mysql_query(" + SELECT p.post_id id, + p.topic_id topic_id, + t.forum_id category_id, + t.topic_title title, + t.topic_first_post_id first_post_id, + p.poster_id user_id, + p.post_text raw, + p.post_time post_time + FROM phpbb_posts p, + phpbb_topics t + WHERE p.topic_id = t.topic_id + LIMIT #{BATCH_SIZE} + OFFSET #{offset}; + ") + + break if results.size < 1 + + create_posts(results, total: total_count, offset: offset) do |m| + skip = false + mapped = {} + + mapped[:id] = m['id'] + mapped[:user_id] = user_id_from_imported_user_id(m['user_id']) || -1 + mapped[:raw] = decode_phpbb_post(m['raw']) + mapped[:created_at] = Time.zone.at(m['post_time']) + + if m['id'] == m['first_post_id'] + mapped[:category] = category_from_imported_category_id(m['category_id']).try(:name) + mapped[:title] = CGI.unescapeHTML(m['title']) + else + parent = topic_lookup_from_imported_post_id(m['first_post_id']) + if parent + mapped[:topic_id] = parent[:topic_id] + else + puts "Parent post #{m['first_post_id']} doesn't exist. Skipping #{m["id"]}: #{m["title"][0..40]}" + skip = true + end + end + + skip ? nil : mapped + end + end + end + + def import_private_messages + puts "", "creating private messages" + + total_count = mysql_query("SELECT count(*) count from phpbb_privmsgs").first["count"] + + batches(BATCH_SIZE) do |offset| + results = mysql_query(" + SELECT msg_id id, + root_level, + author_id user_id, + message_time, + message_subject, + message_text + FROM phpbb_privmsgs + ORDER BY root_level ASC, msg_id ASC + LIMIT #{BATCH_SIZE} + OFFSET #{offset}; + ") + + break if results.size < 1 + + create_posts(results, total: total_count, offset: offset) do |m| + skip = false + mapped = {} + + mapped[:id] = "pm:#{m['id']}" + mapped[:user_id] = user_id_from_imported_user_id(m['user_id']) || -1 + mapped[:raw] = decode_phpbb_post(m['message_text']) + mapped[:created_at] = Time.zone.at(m['message_time']) + + if m['root_level'] == 0 + mapped[:title] = CGI.unescapeHTML(m['message_subject']) + mapped[:archetype] = Archetype.private_message + + # Find the users who are part of this private message. + # Found from the to_address of phpbb_privmsgs, by looking at + # all the rows with the same root_level. + # to_address looks like this: "u_91:u_1234:u_200" + # The "u_" prefix is discarded and the rest is a user_id. + + import_user_ids = mysql_query(" + SELECT to_address + FROM phpbb_privmsgs + WHERE msg_id = #{m['id']} + OR root_level = #{m['id']}").map { |r| r['to_address'].split(':') }.flatten!.map! { |u| u[2..-1] } + + mapped[:target_usernames] = import_user_ids.map! do |import_user_id| + import_user_id.to_s == m['user_id'].to_s ? nil : User.find_by_id(user_id_from_imported_user_id(import_user_id)).try(:username) + end.compact.uniq + + skip = true if mapped[:target_usernames].empty? # pm with yourself? + else + parent = topic_lookup_from_imported_post_id("pm:#{m['root_level']}") + if parent + mapped[:topic_id] = parent[:topic_id] + else + puts "Parent post pm:#{m['root_level']} doesn't exist. Skipping #{m["id"]}: #{m["message_subject"][0..40]}" + skip = true + end + end + + skip ? nil : mapped + end + end + end + + def suspend_users + puts '', "updating banned users" + + where = "ban_userid > 0 AND (ban_end = 0 OR ban_end > #{Time.zone.now.to_i})" + + banned = 0 + failed = 0 + total = mysql_query("SELECT count(*) count FROM phpbb_banlist WHERE #{where}").first['count'] + + system_user = Discourse.system_user + + mysql_query("SELECT ban_userid, ban_start, ban_end, ban_give_reason FROM phpbb_banlist WHERE #{where}").each do |b| + user = find_user_by_import_id(b['ban_userid']) + if user + user.suspended_at = Time.zone.at(b['ban_start']) + user.suspended_till = b['ban_end'] > 0 ? Time.zone.at(b['ban_end']) : 200.years.from_now + + if user.save + StaffActionLogger.new(system_user).log_user_suspend(user, b['ban_give_reason']) + banned += 1 + else + puts "Failed to suspend user #{user.username}. #{user.errors.try(:full_messages).try(:inspect)}" + failed += 1 + end + else + puts "Not found: #{b['ban_userid']}" + failed += 1 + end + + print_status banned + failed, total + end + end + + def mysql_query(sql) + @client.query(sql, cache_rows: false) + end + + def decode_phpbb_post(raw) + s = raw.dup + + # :) is encoded as :) + s.gsub!(/(?:.*)/, '\1') + + # Some links look like this: http://www.onegameamonth.com + s.gsub!(/(.+)<\/a>/, '[\2](\1)') + + # Many phpbb bbcode tags have a hash attached to them. Examples: + # [url=https://google.com:1qh1i7ky]click here[/url:1qh1i7ky] + # [quote="cybereality":b0wtlzex]Some text.[/quote:b0wtlzex] + s.gsub!(/:(?:\w{8})\]/, ']') + + CGI.unescapeHTML(s) + end +end + +ImportScripts::PhpBB3.new.perform