diff --git a/script/import_scripts/base.rb b/script/import_scripts/base.rb index 093875493cd..8a3d30225eb 100644 --- a/script/import_scripts/base.rb +++ b/script/import_scripts/base.rb @@ -7,13 +7,13 @@ if ARGV.include?('bbcode-to-md') # git clone https://github.com/nlalonde/ruby-bbcode-to-md.git # cd ruby-bbcode-to-md # gem build ruby-bbcode-to-md.gemspec - # gem install ruby-bbcode-to-md-0.0.13.gem + # gem install ruby-bbcode-to-md-*.gem require 'ruby-bbcode-to-md' end require_relative '../../config/environment' -require_dependency 'url_helper' -require_dependency 'file_helper' +require_relative 'base/lookup_container' +require_relative 'base/uploader' module ImportScripts; end @@ -24,46 +24,13 @@ class ImportScripts::Base def initialize preload_i18n - @bbcode_to_md = true if ARGV.include?('bbcode-to-md') - @existing_groups = {} - @failed_groups = [] - @existing_users = {} - @failed_users = [] - @categories_lookup = {} - @existing_posts = {} - @topic_lookup = {} - @site_settings_during_import = nil + @lookup = ImportScripts::LookupContainer.new + @uploader = ImportScripts::Uploader.new + + @bbcode_to_md = true if use_bbcode_to_md? + @site_settings_during_import = {} @old_site_settings = {} - @start_time = Time.now - - puts "loading existing groups..." - GroupCustomField.where(name: 'import_id').pluck(:group_id, :value).each do |group_id, import_id| - @existing_groups[import_id] = group_id - end - - puts "loading existing users..." - UserCustomField.where(name: 'import_id').pluck(:user_id, :value).each do |user_id, import_id| - @existing_users[import_id] = user_id - end - - puts "loading existing categories..." - CategoryCustomField.where(name: 'import_id').pluck(:category_id, :value).each do |category_id, import_id| - @categories_lookup[import_id] = category_id - end - - puts "loading existing posts..." - PostCustomField.where(name: 'import_id').pluck(:post_id, :value).each do |post_id, import_id| - @existing_posts[import_id] = post_id - end - - puts "loading existing topics..." - Post.joins(:topic).pluck("posts.id, posts.topic_id, posts.post_number, topics.slug").each do |p| - @topic_lookup[p[0]] = { - topic_id: p[1], - post_number: p[2], - url: Post.url(p[3], p[1], p[2]), - } - end + @start_times = {import: Time.now} end def preload_i18n @@ -87,15 +54,15 @@ class ImportScripts::Base update_topic_count_replies reset_topic_counters - elapsed = Time.now - @start_time - puts '', "Done (#{elapsed.to_s} seconds)" + elapsed = Time.now - @start_times[:import] + puts '', '', 'Done (%02dh %02dmin %02dsec)' % [elapsed/3600, elapsed/60%60, elapsed%60] ensure reset_site_settings end - def change_site_settings - @site_settings_during_import = { + def get_site_settings_for_import + { email_domains_blacklist: '', min_topic_title_length: 1, min_post_length: 1, @@ -106,6 +73,10 @@ class ImportScripts::Base disable_emails: true, authorized_extensions: '*' } + end + + def change_site_settings + @site_settings_during_import = get_site_settings_for_import @site_settings_during_import.each do |key, value| @old_site_settings[key] = SiteSetting.send(key) @@ -124,44 +95,42 @@ class ImportScripts::Base RateLimiter.enable end + def use_bbcode_to_md? + ARGV.include?("bbcode-to-md") + end + # Implementation will do most of its work in its execute method. # It will need to call create_users, create_categories, and create_posts. def execute raise NotImplementedError end - # Get the Discourse Post id based on the id of the source record def post_id_from_imported_post_id(import_id) - @existing_posts[import_id] || @existing_posts[import_id.to_s] + @lookup.post_id_from_imported_post_id(import_id) end - # Get the Discourse topic info (a hash) based on the id of the source record def topic_lookup_from_imported_post_id(import_id) - post_id = post_id_from_imported_post_id(import_id) - post_id ? @topic_lookup[post_id] : nil + @lookup.topic_lookup_from_imported_post_id(import_id) end - # Get the Discourse Group id based on the id of the source group def group_id_from_imported_group_id(import_id) - @existing_groups[import_id] || @existing_groups[import_id.to_s] || find_group_by_import_id(import_id).try(:id) + @lookup.group_id_from_imported_group_id(import_id) end def find_group_by_import_id(import_id) - GroupCustomField.where(name: 'import_id', value: import_id.to_s).first.try(:group) + @lookup.find_group_by_import_id(import_id) end - # Get the Discourse User id based on the id of the source user def user_id_from_imported_user_id(import_id) - @existing_users[import_id] || @existing_users[import_id.to_s] || find_user_by_import_id(import_id).try(:id) + @lookup.user_id_from_imported_user_id(import_id) end def find_user_by_import_id(import_id) - UserCustomField.where(name: 'import_id', value: import_id.to_s).first.try(:user) + @lookup.find_user_by_import_id(import_id) end - # Get the Discourse Category id based on the id of the source category def category_id_from_imported_category_id(import_id) - @categories_lookup[import_id] || @categories_lookup[import_id.to_s] + @lookup.category_id_from_imported_category_id(import_id) end def create_admin(opts={}) @@ -183,31 +152,32 @@ class ImportScripts::Base # group in the original datasource. The given id will not be used # to create the Discourse group record. def create_groups(results, opts={}) - groups_created = 0 - groups_skipped = 0 + created = 0 + skipped = 0 + failed = 0 total = opts[:total] || results.size results.each do |result| g = yield(result) - if group_id_from_imported_group_id(g[:id]) - groups_skipped += 1 + if @lookup.group_id_from_imported_group_id(g[:id]) + skipped += 1 else new_group = create_group(g, g[:id]) if new_group.valid? - @existing_groups[g[:id].to_s] = new_group.id - groups_created += 1 + @lookup.add_group(g[:id].to_s, new_group) + created += 1 else - @failed_groups << g + failed += 1 puts "Failed to create group id #{g[:id]} #{new_group.name}: #{new_group.errors.full_messages}" end end - print_status groups_created + groups_skipped + @failed_groups.length + (opts[:offset] || 0), total + print_status created + skipped + failed + (opts[:offset] || 0), total end - return [groups_created, groups_skipped] + [created, skipped] end def create_group(opts, import_id) @@ -231,8 +201,9 @@ class ImportScripts::Base # user in the original datasource. The given id will not be used to # create the Discourse user record. def create_users(results, opts={}) - users_created = 0 - users_skipped = 0 + created = 0 + skipped = 0 + failed = 0 total = opts[:total] || results.size results.each do |result| @@ -240,34 +211,34 @@ class ImportScripts::Base # block returns nil to skip a user if u.nil? - users_skipped += 1 + skipped += 1 else import_id = u[:id] - if user_id_from_imported_user_id(import_id) - users_skipped += 1 + if @lookup.user_id_from_imported_user_id(import_id) + skipped += 1 elsif u[:email].present? new_user = create_user(u, import_id) if new_user.valid? && new_user.user_profile.valid? - @existing_users[import_id.to_s] = new_user.id - users_created += 1 + @lookup.add_user(import_id.to_s, new_user) + created += 1 else - @failed_users << u + failed += 1 puts "Failed to create user id: #{import_id}, username: #{new_user.username}, email: #{new_user.email}" puts "user errors: #{new_user.errors.full_messages}" puts "user_profile errors: #{new_user.user_profiler.errors.full_messages}" end else - @failed_users << u + failed += 1 puts "Skipping user id #{import_id} because email is blank" end end - print_status users_created + users_skipped + @failed_users.length + (opts[:offset] || 0), total + print_status created + skipped + failed + (opts[:offset] || 0), total end - return [users_created, users_skipped] + [created, skipped] end def create_user(opts, import_id) @@ -334,29 +305,39 @@ class ImportScripts::Base # create the Discourse category record. # Optional attributes are position, description, and parent_category_id. def create_categories(results) + created = 0 + skipped = 0 + total = results.size + results.each do |c| params = yield(c) # block returns nil to skip - next if params.nil? || category_id_from_imported_category_id(params[:id]) + if params.nil? || @lookup.category_id_from_imported_category_id(params[:id]) + skipped += 1 + else + # Basic massaging on the category name + params[:name] = "Blank" if params[:name].blank? + params[:name].strip! + params[:name] = params[:name][0..49] - # Basic massaging on the category name - params[:name] = "Blank" if params[:name].blank? - params[:name].strip! - params[:name] = params[:name][0..49] + # make sure categories don't go more than 2 levels deep + if params[:parent_category_id] + top = Category.find_by_id(params[:parent_category_id]) + top = top.parent_category while top && !top.parent_category.nil? + params[:parent_category_id] = top.id if top + end - puts "\t#{params[:name]}" + new_category = create_category(params, params[:id]) + @lookup.add_category(params[:id], new_category) - # make sure categories don't go more than 2 levels deep - if params[:parent_category_id] - top = Category.find_by_id(params[:parent_category_id]) - top = top.parent_category while top && !top.parent_category.nil? - params[:parent_category_id] = top.id if top + created += 1 end - new_category = create_category(params, params[:id]) - @categories_lookup[params[:id]] = new_category.id + print_status created + skipped, total end + + [created, skipped] end def create_category(opts, import_id) @@ -396,6 +377,7 @@ class ImportScripts::Base skipped = 0 created = 0 total = opts[:total] || results.size + start_time = get_start_time("posts-#{total}") # the post count should be unique enough to differentiate between posts and PMs results.each do |r| params = yield(r) @@ -406,18 +388,14 @@ class ImportScripts::Base else import_id = params.delete(:id).to_s - if post_id_from_imported_post_id(import_id) + if @lookup.post_id_from_imported_post_id(import_id) skipped += 1 # already imported this post else begin new_post = create_post(params, import_id) if new_post.is_a?(Post) - @existing_posts[import_id] = new_post.id - @topic_lookup[new_post.id] = { - post_number: new_post.post_number, - topic_id: new_post.topic_id, - url: new_post.url, - } + @lookup.add_post(import_id, new_post) + @lookup.add_topic(new_post) created_post(new_post) @@ -439,10 +417,10 @@ class ImportScripts::Base end end - print_status skipped + created + (opts[:offset] || 0), total + print_status(created + skipped + (opts[:offset] || 0), total, start_time) end - return [created, skipped] + [created, skipped] end def create_post(opts, import_id) @@ -463,19 +441,8 @@ class ImportScripts::Base post ? post : post_creator.errors.full_messages end - # Creates an upload. - # Expects path to be the full path and filename of the source file. def create_upload(user_id, path, source_filename) - tmp = Tempfile.new('discourse-upload') - src = File.open(path) - FileUtils.copy_stream(src, tmp) - src.close - tmp.rewind - - Upload.create_for(user_id, tmp, source_filename, tmp.size) - ensure - tmp.close rescue nil - tmp.unlink rescue nil + @uploader.create_upload(user_id, path, source_filename) end # Iterate through a list of bookmark records to be imported. @@ -484,8 +451,8 @@ class ImportScripts::Base # Required fields are :user_id and :post_id, where both ids are # the values in the original datasource. def create_bookmarks(results, opts={}) - bookmarks_created = 0 - bookmarks_skipped = 0 + created = 0 + skipped = 0 total = opts[:total] || results.size user = User.new @@ -495,23 +462,29 @@ class ImportScripts::Base params = yield(result) # only the IDs are needed, so this should be enough - user.id = user_id_from_imported_user_id(params[:user_id]) - post.id = post_id_from_imported_post_id(params[:post_id]) - - if user.id.nil? || post.id.nil? - bookmarks_skipped += 1 - puts "Skipping bookmark for user id #{params[:user_id]} and post id #{params[:post_id]}" + if params.nil? + skipped += 1 else - begin - PostAction.act(user, post, PostActionType.types[:bookmark]) - bookmarks_created += 1 - rescue PostAction::AlreadyActed - bookmarks_skipped += 1 - end + user.id = @lookup.user_id_from_imported_user_id(params[:user_id]) + post.id = @lookup.post_id_from_imported_post_id(params[:post_id]) - print_status bookmarks_created + bookmarks_skipped + (opts[:offset] || 0), total + if user.id.nil? || post.id.nil? + skipped += 1 + puts "Skipping bookmark for user id #{params[:user_id]} and post id #{params[:post_id]}" + else + begin + PostAction.act(user, post, PostActionType.types[:bookmark]) + created += 1 + rescue PostAction::AlreadyActed + skipped += 1 + end + end end + + print_status created + skipped + (opts[:offset] || 0), total end + + [created, skipped] end def close_inactive_topics(opts={}) @@ -633,23 +606,26 @@ class ImportScripts::Base end def html_for_upload(upload, display_filename) - if FileHelper.is_image?(upload.url) - embedded_image_html(upload) - else - attachment_html(upload, display_filename) - end + @uploader.html_for_upload(upload, display_filename) end def embedded_image_html(upload) - %Q[
] + @uploader.embedded_image_html(upload) end def attachment_html(upload, display_filename) - "#{display_filename} (#{number_to_human_size(upload.filesize)})" + @uploader.attachment_html(upload, display_filename) end - def print_status(current, max) - print "\r%9d / %d (%5.1f%%) " % [current, max, ((current.to_f / max.to_f) * 100).round(1)] + def print_status(current, max, start_time = nil) + if start_time.present? + elapsed_seconds = Time.now - start_time + elements_per_minute = '[%.0f items/min] ' % [current / elapsed_seconds.to_f * 60] + else + elements_per_minute = '' + end + + print "\r%9d / %d (%5.1f%%) %s" % [current, max, current / max.to_f * 100, elements_per_minute] end def print_spinner @@ -658,6 +634,10 @@ class ImportScripts::Base print "\b#{@spinner_chars[0]}" end + def get_start_time(key) + @start_times.fetch(key) {|k| @start_times[k] = Time.now} + end + def batches(batch_size) offset = 0 loop do diff --git a/script/import_scripts/base/lookup_container.rb b/script/import_scripts/base/lookup_container.rb new file mode 100644 index 00000000000..0d8070932ae --- /dev/null +++ b/script/import_scripts/base/lookup_container.rb @@ -0,0 +1,99 @@ +module ImportScripts + class LookupContainer + def initialize + puts 'loading existing groups...' + @groups = {} + GroupCustomField.where(name: 'import_id').pluck(:group_id, :value).each do |group_id, import_id| + @groups[import_id] = group_id + end + + puts 'loading existing users...' + @users = {} + UserCustomField.where(name: 'import_id').pluck(:user_id, :value).each do |user_id, import_id| + @users[import_id] = user_id + end + + puts 'loading existing categories...' + @categories = {} + CategoryCustomField.where(name: 'import_id').pluck(:category_id, :value).each do |category_id, import_id| + @categories[import_id] = category_id + end + + puts 'loading existing posts...' + @posts = {} + PostCustomField.where(name: 'import_id').pluck(:post_id, :value).each do |post_id, import_id| + @posts[import_id] = post_id + end + + puts 'loading existing topics...' + @topics = {} + Post.joins(:topic).pluck('posts.id, posts.topic_id, posts.post_number, topics.slug').each do |p| + @topics[p[0]] = { + topic_id: p[1], + post_number: p[2], + url: Post.url(p[3], p[1], p[2]) + } + end + end + + # Get the Discourse Post id based on the id of the source record + def post_id_from_imported_post_id(import_id) + @posts[import_id] || @posts[import_id.to_s] + end + + # Get the Discourse topic info (a hash) based on the id of the source record + def topic_lookup_from_imported_post_id(import_id) + post_id = post_id_from_imported_post_id(import_id) + post_id ? @topics[post_id] : nil + end + + # Get the Discourse Group id based on the id of the source group + def group_id_from_imported_group_id(import_id) + @groups[import_id] || @groups[import_id.to_s] || find_group_by_import_id(import_id).try(:id) + end + + # Get the Discourse Group based on the id of the source group + def find_group_by_import_id(import_id) + GroupCustomField.where(name: 'import_id', value: import_id.to_s).first.try(:group) + end + + # Get the Discourse User id based on the id of the source user + def user_id_from_imported_user_id(import_id) + @users[import_id] || @users[import_id.to_s] || find_user_by_import_id(import_id).try(:id) + end + + # Get the Discourse User based on the id of the source user + def find_user_by_import_id(import_id) + UserCustomField.where(name: 'import_id', value: import_id.to_s).first.try(:user) + end + + # Get the Discourse Category id based on the id of the source category + def category_id_from_imported_category_id(import_id) + @categories[import_id] || @categories[import_id.to_s] + end + + def add_group(import_id, group) + @groups[import_id] = group.id + end + + def add_user(import_id, user) + @users[import_id] = user.id + end + + def add_category(import_id, category) + @categories[import_id] = category.id + end + + def add_post(import_id, post) + @posts[import_id] = post.id + end + + def add_topic(post) + @topics[post.id] = { + post_number: post.post_number, + topic_id: post.topic_id, + url: post.url, + } + end + end +end diff --git a/script/import_scripts/base/uploader.rb b/script/import_scripts/base/uploader.rb new file mode 100644 index 00000000000..62ddac451d7 --- /dev/null +++ b/script/import_scripts/base/uploader.rb @@ -0,0 +1,45 @@ +require_dependency 'url_helper' +require_dependency 'file_helper' + +module ImportScripts + class Uploader + include ActionView::Helpers::NumberHelper + + # Creates an upload. + # Expects path to be the full path and filename of the source file. + # @return [Upload] + def create_upload(user_id, path, source_filename) + tmp = Tempfile.new('discourse-upload') + src = File.open(path) + FileUtils.copy_stream(src, tmp) + src.close + tmp.rewind + + Upload.create_for(user_id, tmp, source_filename, tmp.size) + rescue => e + Rails.logger.error("Failed to create upload: #{e}") + nil + ensure + tmp.close rescue nil + tmp.unlink rescue nil + end + + def html_for_upload(upload, display_filename) + if FileHelper.is_image?(upload.url) + embedded_image_html(upload) + else + attachment_html(upload, display_filename) + end + end + + def embedded_image_html(upload) + image_width = [upload.width, SiteSetting.max_image_width].compact.min + image_height = [upload.height, SiteSetting.max_image_height].compact.min + %Q[
] + end + + def attachment_html(upload, display_filename) + "#{display_filename} (#{number_to_human_size(upload.filesize)})" + end + end +end diff --git a/script/import_scripts/phpbb3.rb b/script/import_scripts/phpbb3.rb index 373db6e755b..639b51d7852 100644 --- a/script/import_scripts/phpbb3.rb +++ b/script/import_scripts/phpbb3.rb @@ -1,486 +1,29 @@ -require "mysql2" -require File.expand_path(File.dirname(__FILE__) + "/base.rb") - -class ImportScripts::PhpBB3 < ImportScripts::Base - - PHPBB_DB = "phpbb" - BATCH_SIZE = 1000 - - ORIGINAL_SITE_PREFIX = "oldsite.example.com/forums" # without http(s):// - NEW_SITE_PREFIX = "http://discourse.example.com" # with http:// or https:// - - # Set PHPBB_BASE_DIR to the base directory of your phpBB installation. - # When importing, you should place the subdirectories "files" (containing all - # attachments) and "images" (containing avatars) in PHPBB_BASE_DIR. - # If nil, [attachment] tags and avatars won't be processed. - # Edit AUTHORIZED_EXTENSIONS as needed. - # If you used ATTACHMENTS_BASE_DIR before, e.g. ATTACHMENTS_BASE_DIR = '/var/www/phpbb/files/' - # would become PHPBB_BASE_DIR = '/var/www/phpbb' - # now. - PHPBB_BASE_DIR = '/var/www/phpbb' - AUTHORIZED_EXTENSIONS = ['jpg', 'jpeg', 'png', 'gif', 'zip', 'rar', 'pdf'] - - # Avatar types to import.: - # 1 = uploaded avatars (you should probably leave this here) - # 2 = hotlinked avatars - WARNING: this will considerably slow down your import - # if there are many hotlinked avatars and some of them unavailable! - # 3 = galery avatars (the predefined avatars phpBB offers. They will be converted to uploaded avatars) - IMPORT_AVATARS = [1, 3] - - def initialize - super - - @client = Mysql2::Client.new( - host: "localhost", - username: "root", - #password: "password", - database: PHPBB_DB - ) - phpbb_read_config - end - - def execute - import_users - import_categories - import_posts - import_private_messages - import_attachments unless PHPBB_BASE_DIR.nil? - suspend_users - end - - def import_users - puts '', "creating users" - - total_count = mysql_query("SELECT count(*) count - FROM phpbb_users u - JOIN phpbb_groups g ON g.group_id = u.group_id - WHERE g.group_name != 'BOTS' - AND u.user_type != 1;").first['count'] - - batches(BATCH_SIZE) do |offset| - results = mysql_query( - "SELECT user_id id, user_email email, username, user_regdate, group_name, user_avatar_type, user_avatar - FROM phpbb_users u - JOIN phpbb_groups g ON g.group_id = u.group_id - WHERE g.group_name != 'BOTS' - AND u.user_type != 1 - ORDER BY u.user_id ASC - LIMIT #{BATCH_SIZE} - OFFSET #{offset};") - - break if results.size < 1 - - create_users(results, total: total_count, offset: offset) do |user| - { id: user['id'], - email: user['email'], - username: user['username'], - created_at: Time.zone.at(user['user_regdate']), - moderator: user['group_name'] == 'GLOBAL_MODERATORS', - admin: user['group_name'] == 'ADMINISTRATORS', - post_create_action: proc do |newmember| - if not PHPBB_BASE_DIR.nil? and IMPORT_AVATARS.include?(user['user_avatar_type']) and newmember.uploaded_avatar_id.blank? - path = phpbb_avatar_fullpath(user['user_avatar_type'], user['user_avatar']) - if path - begin - upload = create_upload(newmember.id, path, user['user_avatar']) - if upload.persisted? - newmember.import_mode = false - newmember.create_user_avatar - newmember.import_mode = true - newmember.user_avatar.update(custom_upload_id: upload.id) - newmember.update(uploaded_avatar_id: upload.id) - else - puts "Error: Upload did not persist!" - end - rescue SystemCallError => err - puts "Could not import avatar: #{err.message}" - end - end - end - end - } - end - end - end - - def import_categories - results = mysql_query(" - SELECT forum_id id, parent_id, left(forum_name, 50) name, forum_desc description - FROM phpbb_forums - ORDER BY parent_id ASC, forum_id ASC - ") - - create_categories(results) do |row| - h = {id: row['id'], name: CGI.unescapeHTML(row['name']), description: CGI.unescapeHTML(row['description'])} - if row['parent_id'].to_i > 0 - h[:parent_category_id] = category_id_from_imported_category_id(row['parent_id']) - end - h - end - end - - def import_posts - puts "", "creating topics and posts" - - total_count = mysql_query("SELECT count(*) count from phpbb_posts").first["count"] - - batches(BATCH_SIZE) do |offset| - results = mysql_query(" - SELECT p.post_id id, - p.topic_id topic_id, - t.forum_id category_id, - t.topic_title title, - t.topic_first_post_id first_post_id, - p.poster_id user_id, - p.post_text raw, - p.post_time post_time - FROM phpbb_posts p, - phpbb_topics t - WHERE p.topic_id = t.topic_id - ORDER BY id - LIMIT #{BATCH_SIZE} - OFFSET #{offset}; - ") - - break if results.size < 1 - - create_posts(results, total: total_count, offset: offset) do |m| - skip = false - mapped = {} - - mapped[:id] = m['id'] - mapped[:user_id] = user_id_from_imported_user_id(m['user_id']) || -1 - mapped[:raw] = process_phpbb_post(m['raw'], m['id']) - mapped[:created_at] = Time.zone.at(m['post_time']) - - if m['id'] == m['first_post_id'] - mapped[:category] = category_id_from_imported_category_id(m['category_id']) - mapped[:title] = CGI.unescapeHTML(m['title']) - else - parent = topic_lookup_from_imported_post_id(m['first_post_id']) - if parent - mapped[:topic_id] = parent[:topic_id] - else - puts "Parent post #{m['first_post_id']} doesn't exist. Skipping #{m["id"]}: #{m["title"][0..40]}" - skip = true - end - end - - skip ? nil : mapped - end - end - end - - def import_private_messages - puts "", "creating private messages" - - total_count = mysql_query("SELECT count(*) count from phpbb_privmsgs").first["count"] - - batches(BATCH_SIZE) do |offset| - results = mysql_query(" - SELECT msg_id id, - root_level, - author_id user_id, - message_time, - message_subject, - message_text - FROM phpbb_privmsgs - ORDER BY root_level ASC, msg_id ASC - LIMIT #{BATCH_SIZE} - OFFSET #{offset}; - ") - - break if results.size < 1 - - create_posts(results, total: total_count, offset: offset) do |m| - skip = false - mapped = {} - - mapped[:id] = "pm:#{m['id']}" - mapped[:user_id] = user_id_from_imported_user_id(m['user_id']) || -1 - mapped[:raw] = process_phpbb_post(m['message_text'], m['id']) - mapped[:created_at] = Time.zone.at(m['message_time']) - - if m['root_level'] == 0 - mapped[:title] = CGI.unescapeHTML(m['message_subject']) - mapped[:archetype] = Archetype.private_message - - # Find the users who are part of this private message. - # Found from the to_address of phpbb_privmsgs, by looking at - # all the rows with the same root_level. - # to_address looks like this: "u_91:u_1234:u_200" - # The "u_" prefix is discarded and the rest is a user_id. - - import_user_ids = mysql_query(" - SELECT to_address - FROM phpbb_privmsgs - WHERE msg_id = #{m['id']} - OR root_level = #{m['id']}").map { |r| r['to_address'].split(':') }.flatten!.map! { |u| u[2..-1] } - - mapped[:target_usernames] = import_user_ids.map! do |import_user_id| - import_user_id.to_s == m['user_id'].to_s ? nil : User.find_by_id(user_id_from_imported_user_id(import_user_id)).try(:username) - end.compact.uniq - - skip = true if mapped[:target_usernames].empty? # pm with yourself? - else - parent = topic_lookup_from_imported_post_id("pm:#{m['root_level']}") - if parent - mapped[:topic_id] = parent[:topic_id] - else - puts "Parent post pm:#{m['root_level']} doesn't exist. Skipping #{m["id"]}: #{m["message_subject"][0..40]}" - skip = true - end - end - - skip ? nil : mapped - end - end - end - - def suspend_users - puts '', "updating banned users" - - where = "ban_userid > 0 AND (ban_end = 0 OR ban_end > #{Time.zone.now.to_i})" - - banned = 0 - failed = 0 - total = mysql_query("SELECT count(*) count FROM phpbb_banlist WHERE #{where}").first['count'] - - system_user = Discourse.system_user - - mysql_query("SELECT ban_userid, ban_start, ban_end, ban_give_reason FROM phpbb_banlist WHERE #{where}").each do |b| - user = find_user_by_import_id(b['ban_userid']) - if user - user.suspended_at = Time.zone.at(b['ban_start']) - user.suspended_till = b['ban_end'] > 0 ? Time.zone.at(b['ban_end']) : 200.years.from_now - - if user.save - StaffActionLogger.new(system_user).log_user_suspend(user, b['ban_give_reason']) - banned += 1 - else - puts "Failed to suspend user #{user.username}. #{user.errors.try(:full_messages).try(:inspect)}" - failed += 1 - end - else - puts "Not found: #{b['ban_userid']}" - failed += 1 - end - - print_status banned + failed, total - end - end - - def process_phpbb_post(raw, import_id) - s = raw.dup - - # :) is encoded as :) - s.gsub!(/]+) \/>/, '\1') - - # Internal forum links of this form: viewtopic.php?f=26&t=3412 - s.gsub!(/viewtopic(?:.*)t=(\d+)<\/a>/) do |phpbb_link| - replace_internal_link(phpbb_link, $1, import_id) - end - - # Some links look like this: http://www.onegameamonth.com - s.gsub!(/(.+)<\/a>/, '[\2](\1)') - - # Many phpbb bbcode tags have a hash attached to them. Examples: - # [url=https://google.com:1qh1i7ky]click here[/url:1qh1i7ky] - # [quote="cybereality":b0wtlzex]Some text.[/quote:b0wtlzex] - s.gsub!(/:(?:\w{8})\]/, ']') - - s = CGI.unescapeHTML(s) - - # phpBB shortens link text like this, which breaks our markdown processing: - # [http://answers.yahoo.com/question/index ... 223AAkkPli](http://answers.yahoo.com/question/index?qid=20070920134223AAkkPli) - # - # Work around it for now: - s.gsub!(/\[http(s)?:\/\/(www\.)?/, '[') - - # Replace internal forum links that aren't in the format - s.gsub!(internal_url_regexp) do |phpbb_link| - replace_internal_link(phpbb_link, $1, import_id) - end - # convert list tags to ul and list=1 tags to ol - # (basically, we're only missing list=a here...) - s.gsub!(/\[list\](.*?)\[\/list:u\]/m, '[ul]\1[/ul]') - s.gsub!(/\[list=1\](.*?)\[\/list:o\]/m, '[ol]\1[/ol]') - # convert *-tags to li-tags so bbcode-to-md can do its magic on phpBB's lists: - s.gsub!(/\[\*\](.*?)\[\/\*:m\]/, '[li]\1[/li]') - - s - end - - def replace_internal_link(phpbb_link, import_topic_id, from_import_post_id) - results = mysql_query("select topic_first_post_id from phpbb_topics where topic_id = #{import_topic_id}") - - return phpbb_link unless results.size > 0 - - linked_topic_id = results.first['topic_first_post_id'] - lookup = topic_lookup_from_imported_post_id(linked_topic_id) - - return phpbb_link unless lookup - - t = Topic.find_by_id(lookup[:topic_id]) - if t - "#{NEW_SITE_PREFIX}/t/#{t.slug}/#{t.id}" - else - phpbb_link - end - end - - def internal_url_regexp - @internal_url_regexp ||= Regexp.new("http(?:s)?://#{ORIGINAL_SITE_PREFIX.gsub('.', '\.')}/viewtopic\\.php?(?:\\S*)t=(\\d+)") - end - - # This step is done separately because it can take multiple attempts to get right (because of - # missing files, wrong paths, authorized extensions, etc.). - def import_attachments - setting = AUTHORIZED_EXTENSIONS.join('|') - SiteSetting.authorized_extensions = setting if setting != SiteSetting.authorized_extensions - - r = /\[attachment=[\d]+\]<\!-- [\w]+ --\>([^<]+)<\!-- [\w]+ --\>\[\/attachment\]/ - - user = Discourse.system_user - - current_count = 0 - total_count = Post.count - success_count = 0 - fail_count = 0 - - puts '', "Importing attachments...", '' - - Post.find_each do |post| - current_count += 1 - print_status current_count, total_count - - new_raw = post.raw.dup - new_raw.gsub!(r) do |s| - matches = r.match(s) - real_filename = matches[1] - - # note: currently, we do not import PM attachments. - # If this should be desired, this has to be fixed, - # otherwise, the SQL state coughs up an error for the - # clause "WHERE post_msg_id = pm12345"... - next s if post.custom_fields['import_id'].start_with?('pm:') - - sql = "SELECT physical_filename, - mimetype - FROM phpbb_attachments - WHERE post_msg_id = #{post.custom_fields['import_id']} - AND real_filename = '#{real_filename}';" - - begin - results = mysql_query(sql) - rescue Mysql2::Error => e - puts "SQL Error" - puts e.message - puts sql - fail_count += 1 - next s - end - - row = results.first - if !row - puts "Couldn't find phpbb_attachments record for post.id = #{post.id}, import_id = #{post.custom_fields['import_id']}, real_filename = #{real_filename}" - fail_count += 1 - next s - end - - filename = File.join(PHPBB_BASE_DIR+'/files', row['physical_filename']) - if !File.exists?(filename) - puts "Attachment file doesn't exist: #{filename}" - fail_count += 1 - next s - end - - upload = create_upload(user.id, filename, real_filename) - - if upload.nil? || !upload.valid? - puts "Upload not valid :(" - puts upload.errors.inspect if upload - fail_count += 1 - next s - end - - success_count += 1 - - html_for_upload(upload, real_filename) - end - - if new_raw != post.raw - PostRevisor.new(post).revise!(post.user, { raw: new_raw }, { bypass_bump: true, edit_reason: 'Migrate from PHPBB3' }) - end - end - - puts '', '' - puts "succeeded: #{success_count}" - puts " failed: #{fail_count}" if fail_count > 0 - puts '' - end - - # Read avatar config from phpBB configuration table. - # Stored there: - paths relative to the phpBB install path - # - "salt", i.e. base filename for uploaded avatars - # - def phpbb_read_config - results = mysql_query("SELECT config_name, config_value - FROM phpbb_config;") - if results.size<1 - puts "could not read config... no avatars and attachments will be imported!" - return - end - results.each do |result| - if result['config_name']=='avatar_gallery_path' - @avatar_gallery_path = result['config_value'] - elsif result['config_name']=='avatar_path' - @avatar_path = result['config_value'] - elsif result['config_name']=='avatar_salt' - @avatar_salt = result['config_value'] - end - end - end - - # Create the full path to the phpBB avatar specified by avatar_type and filename. - # - def phpbb_avatar_fullpath(avatar_type, filename) - case avatar_type - when 1 # uploaded avatar - filename.gsub!(/_[0-9]+\./,'.') # we need 1337.jpg, not 1337_2983745.jpg - path=@avatar_path - PHPBB_BASE_DIR+'/'+path+'/'+@avatar_salt+'_'+filename - when 3 # gallery avatar - path=@avatar_gallery_path - PHPBB_BASE_DIR+'/'+path+'/'+filename - when 2 # hotlinked avatar - begin - hotlinked = FileHelper.download(filename, SiteSetting.max_image_size_kb.kilobytes, "discourse-hotlinked") - rescue StandardError => err - puts "Error downloading avatar: #{err.message}. Skipping..." - return nil - end - if hotlinked - if hotlinked.size <= SiteSetting.max_image_size_kb.kilobytes - return hotlinked - else - Rails.logger.error("Failed to pull hotlinked image: #{filename} - Image is bigger than #{@max_size}") - nil - end - else - Rails.logger.error("There was an error while downloading '#{filename}' locally.") - nil - end - else - puts 'Invalid avatar type #{avatar_type}, skipping' - nil - end - end - - - def mysql_query(sql) - @client.query(sql, cache_rows: false) - end +if ARGV.length != 1 || !File.exists?(ARGV[0]) + STDERR.puts '', 'Usage of phpBB3 importer:', 'bundle exec ruby phpbb3.rb ' + STDERR.puts '', "Use the settings file from #{File.expand_path('phpbb3/settings.yml', File.dirname(__FILE__))} as an example." + exit 1 end -ImportScripts::PhpBB3.new.perform +module ImportScripts + module PhpBB3 + require_relative 'phpbb3/support/settings' + require_relative 'phpbb3/database/database' + + @settings = Settings.load(ARGV[0]) + + # We need to load the gem files for ruby-bbcode-to-md and the database adapter + # (e.g. mysql2) before bundler gets initialized by the base importer. + # Otherwise we get an error since those gems are not always in the Gemfile. + require 'ruby-bbcode-to-md' if @settings.use_bbcode_to_md + + begin + @database = Database.create(@settings.database) + rescue UnsupportedVersionError => error + STDERR.puts '', error.message + exit 1 + end + + require_relative 'phpbb3/importer' + Importer.new(@settings, @database).perform + end +end diff --git a/script/import_scripts/phpbb3/database/database.rb b/script/import_scripts/phpbb3/database/database.rb new file mode 100644 index 00000000000..731f05b8a12 --- /dev/null +++ b/script/import_scripts/phpbb3/database/database.rb @@ -0,0 +1,56 @@ +require 'mysql2' + +module ImportScripts::PhpBB3 + class Database + # @param database_settings [ImportScripts::PhpBB3::DatabaseSettings] + def self.create(database_settings) + Database.new(database_settings).create_database + end + + # @param database_settings [ImportScripts::PhpBB3::DatabaseSettings] + def initialize(database_settings) + @database_settings = database_settings + @database_client = create_database_client + end + + # @return [ImportScripts::PhpBB3::Database_3_0 | ImportScripts::PhpBB3::Database_3_1] + def create_database + version = get_phpbb_version + + if version.start_with?('3.0') + require_relative 'database_3_0' + Database_3_0.new(@database_client, @database_settings) + elsif version.start_with?('3.1') + require_relative 'database_3_1' + Database_3_1.new(@database_client, @database_settings) + else + raise UnsupportedVersionError, "Unsupported version (#{version}) of phpBB detected.\n" \ + << 'Currently only 3.0.x and 3.1.x are supported by this importer.' + end + end + + protected + + def create_database_client + Mysql2::Client.new( + host: @database_settings.host, + username: @database_settings.username, + password: @database_settings.password, + database: @database_settings.schema + ) + end + + def get_phpbb_version + table_prefix = @database_settings.table_prefix + + @database_client.query(<<-SQL, cache_rows: false, symbolize_keys: true).first[:config_value] + SELECT config_value + FROM #{table_prefix}_config + WHERE config_name = 'version' + SQL + end + end + + class UnsupportedVersionError < RuntimeError; + end +end diff --git a/script/import_scripts/phpbb3/database/database_3_0.rb b/script/import_scripts/phpbb3/database/database_3_0.rb new file mode 100644 index 00000000000..d4115cc38e1 --- /dev/null +++ b/script/import_scripts/phpbb3/database/database_3_0.rb @@ -0,0 +1,333 @@ +require_relative 'database_base' +require_relative '../support/constants' + +module ImportScripts::PhpBB3 + class Database_3_0 < DatabaseBase + def count_users + count(<<-SQL) + SELECT COUNT(*) AS count + FROM #{@table_prefix}_users u + JOIN #{@table_prefix}_groups g ON g.group_id = u.group_id + WHERE u.user_type != #{Constants::USER_TYPE_IGNORE} + SQL + end + + def fetch_users(offset) + query(<<-SQL) + SELECT u.user_id, u.user_email, u.username, u.user_regdate, u.user_lastvisit, u.user_ip, + u.user_type, u.user_inactive_reason, g.group_name, b.ban_start, b.ban_end, b.ban_reason, + u.user_posts, u.user_website, u.user_from, u.user_birthday, u.user_avatar_type, u.user_avatar + FROM #{@table_prefix}_users u + JOIN #{@table_prefix}_groups g ON (g.group_id = u.group_id) + LEFT OUTER JOIN #{@table_prefix}_banlist b ON ( + u.user_id = b.ban_userid AND b.ban_exclude = 0 AND + (b.ban_end = 0 OR b.ban_end >= UNIX_TIMESTAMP()) + ) + WHERE u.user_type != #{Constants::USER_TYPE_IGNORE} + ORDER BY u.user_id ASC + LIMIT #{@batch_size} + OFFSET #{offset} + SQL + end + + def count_anonymous_users + count(<<-SQL) + SELECT COUNT(DISTINCT post_username) AS count + FROM #{@table_prefix}_posts + WHERE post_username <> '' + SQL + end + + def fetch_anonymous_users(offset) + query(<<-SQL) + SELECT post_username, MIN(post_time) AS first_post_time + FROM #{@table_prefix}_posts + WHERE post_username <> '' + GROUP BY post_username + ORDER BY post_username ASC + LIMIT #{@batch_size} + OFFSET #{offset} + SQL + end + + def fetch_categories + query(<<-SQL) + SELECT f.forum_id, f.parent_id, f.forum_name, f.forum_name, f.forum_desc, x.first_post_time + FROM phpbb_forums f + LEFT OUTER JOIN ( + SELECT MIN(topic_time) AS first_post_time, forum_id + FROM phpbb_topics + GROUP BY forum_id + ) x ON (f.forum_id = x.forum_id) + WHERE f.forum_type != #{Constants::FORUM_TYPE_LINK} + ORDER BY f.parent_id ASC, f.left_id ASC + SQL + end + + def count_posts + count(<<-SQL) + SELECT COUNT(*) AS count + FROM #{@table_prefix}_posts + SQL + end + + def fetch_posts(offset) + query(<<-SQL) + SELECT p.post_id, p.topic_id, t.forum_id, t.topic_title, t.topic_first_post_id, p.poster_id, + p.post_text, p.post_time, p.post_username, t.topic_status, t.topic_type, t.poll_title, + CASE WHEN t.poll_length > 0 THEN t.poll_start + t.poll_length ELSE NULL END AS poll_end, + t.poll_max_options, p.post_attachment + FROM #{@table_prefix}_posts p + JOIN #{@table_prefix}_topics t ON (p.topic_id = t.topic_id) + ORDER BY p.post_id ASC + LIMIT #{@batch_size} + OFFSET #{offset} + SQL + end + + def get_first_post_id(topic_id) + query(<<-SQL).first[:topic_first_post_id] + SELECT topic_first_post_id + FROM #{@table_prefix}_topics + WHERE topic_id = #{topic_id} + SQL + end + + def fetch_poll_options(topic_id) + query(<<-SQL) + SELECT poll_option_id, poll_option_text, poll_option_total + FROM #{@table_prefix}_poll_options + WHERE topic_id = #{topic_id} + ORDER BY poll_option_id + SQL + end + + def fetch_poll_votes(topic_id) + # this query ignores votes from users that do not exist anymore + query(<<-SQL) + SELECT u.user_id, v.poll_option_id + FROM #{@table_prefix}_poll_votes v + JOIN #{@table_prefix}_users u ON (v.vote_user_id = u.user_id) + WHERE v.topic_id = #{topic_id} + SQL + end + + def count_voters(topic_id) + # anonymous voters can't be counted, but lets try to make the count look "correct" anyway + count(<<-SQL) + SELECT MAX(count) AS count + FROM ( + SELECT COUNT(DISTINCT vote_user_id) AS count + FROM #{@table_prefix}_poll_votes + WHERE topic_id = #{topic_id} + UNION + SELECT MAX(poll_option_total) AS count + FROM #{@table_prefix}_poll_options + WHERE topic_id = #{topic_id} + ) x + SQL + end + + def get_max_attachment_size + query(<<-SQL).first[:filesize] + SELECT IFNULL(MAX(filesize), 0) AS filesize + FROM #{@table_prefix}_attachments + SQL + end + + def fetch_attachments(topic_id, post_id) + query(<<-SQL) + SELECT physical_filename, real_filename + FROM #{@table_prefix}_attachments + WHERE topic_id = #{topic_id} AND post_msg_id = #{post_id} + ORDER BY filetime DESC, post_msg_id ASC + SQL + end + + def count_messages(use_fixed_messages) + if use_fixed_messages + count(<<-SQL) + SELECT COUNT(*) AS count + FROM #{@table_prefix}_import_privmsgs + SQL + else + count(<<-SQL) + SELECT COUNT(*) AS count + FROM #{@table_prefix}_privmsgs + SQL + end + end + + def fetch_messages(use_fixed_messages, offset) + if use_fixed_messages + query(<<-SQL) + SELECT m.msg_id, i.root_msg_id, m.author_id, m.message_time, m.message_subject, m.message_text, + IFNULL(a.attachment_count, 0) AS attachment_count + FROM #{@table_prefix}_privmsgs m + JOIN #{@table_prefix}_import_privmsgs i ON (m.msg_id = i.msg_id) + LEFT OUTER JOIN ( + SELECT post_msg_id, COUNT(*) AS attachment_count + FROM #{@table_prefix}_attachments + WHERE topic_id = 0 + GROUP BY post_msg_id + ) a ON (m.msg_id = a.post_msg_id) + ORDER BY i.root_msg_id ASC, m.msg_id ASC + LIMIT #{@batch_size} + OFFSET #{offset} + SQL + else + query(<<-SQL) + SELECT m.msg_id, m.root_level AS root_msg_id, m.author_id, m.message_time, m.message_subject, + m.message_text, IFNULL(a.attachment_count, 0) AS attachment_count + FROM #{@table_prefix}_privmsgs m + LEFT OUTER JOIN ( + SELECT post_msg_id, COUNT(*) AS attachment_count + FROM #{@table_prefix}_attachments + WHERE topic_id = 0 + GROUP BY post_msg_id + ) a ON (m.msg_id = a.post_msg_id) + ORDER BY m.root_level ASC, m.msg_id ASC + LIMIT #{@batch_size} + OFFSET #{offset} + SQL + end + end + + def fetch_message_participants(msg_id, use_fixed_messages) + if use_fixed_messages + query(<<-SQL) + SELECT m.to_address + FROM #{@table_prefix}_privmsgs m + JOIN #{@table_prefix}_import_privmsgs i ON (m.msg_id = i.msg_id) + WHERE i.msg_id = #{msg_id} OR i.root_msg_id = #{msg_id} + SQL + else + query(<<-SQL) + SELECT m.to_address + FROM #{@table_prefix}_privmsgs m + WHERE m.msg_id = #{msg_id} OR m.root_level = #{msg_id} + SQL + end + end + + def calculate_fixed_messages + drop_temp_import_message_table + create_temp_import_message_table + fill_temp_import_message_table + + drop_import_message_table + create_import_message_table + fill_import_message_table + + drop_temp_import_message_table + end + + def count_bookmarks + count(<<-SQL) + SELECT COUNT(*) AS count + FROM #{@table_prefix}_bookmarks + SQL + end + + def fetch_bookmarks(offset) + query(<<-SQL) + SELECT b.user_id, t.topic_first_post_id + FROM #{@table_prefix}_bookmarks b + JOIN #{@table_prefix}_topics t ON (b.topic_id = t.topic_id) + ORDER BY b.user_id ASC, b.topic_id ASC + LIMIT #{@batch_size} + OFFSET #{offset} + SQL + end + + def get_config_values + query(<<-SQL).first + SELECT + (SELECT config_value FROM #{@table_prefix}_config WHERE config_name = 'version') AS phpbb_version, + (SELECT config_value FROM #{@table_prefix}_config WHERE config_name = 'avatar_gallery_path') AS avatar_gallery_path, + (SELECT config_value FROM #{@table_prefix}_config WHERE config_name = 'avatar_path') AS avatar_path, + (SELECT config_value FROM #{@table_prefix}_config WHERE config_name = 'avatar_salt') AS avatar_salt, + (SELECT config_value FROM #{@table_prefix}_config WHERE config_name = 'smilies_path') AS smilies_path, + (SELECT config_value FROM #{@table_prefix}_config WHERE config_name = 'upload_path') AS attachment_path + SQL + end + + protected + + def drop_temp_import_message_table + query("DROP TABLE IF EXISTS #{@table_prefix}_import_privmsgs_temp") + end + + def create_temp_import_message_table + query(<<-SQL) + CREATE TABLE #{@table_prefix}_import_privmsgs_temp ( + msg_id MEDIUMINT(8) NOT NULL, + root_msg_id MEDIUMINT(8) NOT NULL, + recipient_id MEDIUMINT(8), + normalized_subject VARCHAR(255) NOT NULL, + PRIMARY KEY (msg_id) + ) + SQL + end + + # this removes duplicate messages, converts the to_address to a number + # and stores the message_subject in lowercase and without the prefix "Re: " + def fill_temp_import_message_table + query(<<-SQL) + INSERT INTO #{@table_prefix}_import_privmsgs_temp (msg_id, root_msg_id, recipient_id, normalized_subject) + SELECT m.msg_id, m.root_level, + CASE WHEN m.root_level = 0 AND INSTR(m.to_address, ':') = 0 THEN + CAST(SUBSTRING(m.to_address, 3) AS SIGNED INTEGER) + ELSE NULL END AS recipient_id, + LOWER(CASE WHEN m.message_subject LIKE 'Re: %' THEN + SUBSTRING(m.message_subject, 5) + ELSE m.message_subject END) AS normalized_subject + FROM #{@table_prefix}_privmsgs m + WHERE NOT EXISTS ( + SELECT 1 + FROM #{@table_prefix}_privmsgs x + WHERE x.msg_id < m.msg_id AND x.root_level = m.root_level AND x.author_id = m.author_id + AND x.to_address = m.to_address AND x.message_time = m.message_time + ) + SQL + end + + def drop_import_message_table + query("DROP TABLE IF EXISTS #{@table_prefix}_import_privmsgs") + end + + def create_import_message_table + query(<<-SQL) + CREATE TABLE #{@table_prefix}_import_privmsgs ( + msg_id MEDIUMINT(8) NOT NULL, + root_msg_id MEDIUMINT(8) NOT NULL, + PRIMARY KEY (msg_id), + INDEX #{@table_prefix}_import_privmsgs_root_msg_id (root_msg_id) + ) + SQL + end + + # this tries to calculate the actual root_level (= msg_id of the first message in a + # private conversation) based on subject, time, author and recipient + def fill_import_message_table + query(<<-SQL) + INSERT INTO #{@table_prefix}_import_privmsgs (msg_id, root_msg_id) + SELECT m.msg_id, CASE WHEN i.root_msg_id = 0 THEN + COALESCE(( + SELECT a.msg_id + FROM #{@table_prefix}_privmsgs a + JOIN #{@table_prefix}_import_privmsgs_temp b ON (a.msg_id = b.msg_id) + WHERE ((a.author_id = m.author_id AND b.recipient_id = i.recipient_id) OR + (a.author_id = i.recipient_id AND b.recipient_id = m.author_id)) + AND b.normalized_subject = i.normalized_subject + AND a.msg_id <> m.msg_id + AND a.message_time < m.message_time + ORDER BY a.message_time ASC + LIMIT 1 + ), 0) ELSE i.root_msg_id END AS root_msg_id + FROM #{@table_prefix}_privmsgs m + JOIN #{@table_prefix}_import_privmsgs_temp i ON (m.msg_id = i.msg_id) + SQL + end + end +end diff --git a/script/import_scripts/phpbb3/database/database_3_1.rb b/script/import_scripts/phpbb3/database/database_3_1.rb new file mode 100644 index 00000000000..bf13546e2d0 --- /dev/null +++ b/script/import_scripts/phpbb3/database/database_3_1.rb @@ -0,0 +1,26 @@ +require_relative 'database_3_0' +require_relative '../support/constants/constants' + +module ImportScripts::PhpBB3 + class Database_3_1 < Database_3_0 + def fetch_users(offset) + query(<<-SQL) + SELECT u.user_id, u.user_email, u.username, u.user_regdate, u.user_lastvisit, u.user_ip, + u.user_type, u.user_inactive_reason, g.group_name, b.ban_start, b.ban_end, b.ban_reason, + u.user_posts, f.pf_phpbb_website AS user_website, f.pf_phpbb_location AS user_from, + u.user_birthday, u.user_avatar_type, u.user_avatar + FROM #{@table_prefix}_users u + JOIN #{@table_prefix}_profile_fields_data f ON (u.user_id = f.user_id) + JOIN #{@table_prefix}_groups g ON (g.group_id = u.group_id) + LEFT OUTER JOIN #{@table_prefix}_banlist b ON ( + u.user_id = b.ban_userid AND b.ban_exclude = 0 AND + (b.ban_end = 0 OR b.ban_end >= UNIX_TIMESTAMP()) + ) + WHERE u.user_type != #{Constants::USER_TYPE_IGNORE} + ORDER BY u.user_id ASC + LIMIT #{@batch_size} + OFFSET #{offset} + SQL + end + end +end diff --git a/script/import_scripts/phpbb3/database/database_base.rb b/script/import_scripts/phpbb3/database/database_base.rb new file mode 100644 index 00000000000..3c8b4b37181 --- /dev/null +++ b/script/import_scripts/phpbb3/database/database_base.rb @@ -0,0 +1,24 @@ +module ImportScripts::PhpBB3 + class DatabaseBase + # @param database_client [Mysql2::Client] + # @param database_settings [ImportScripts::PhpBB3::DatabaseSettings] + def initialize(database_client, database_settings) + @database_client = database_client + + @batch_size = database_settings.batch_size + @table_prefix = database_settings.table_prefix + end + + protected + + # Executes a database query. + def query(sql) + @database_client.query(sql, cache_rows: false, symbolize_keys: true) + end + + # Executes a database query and returns the value of the 'count' column. + def count(sql) + query(sql).first[:count] + end + end +end diff --git a/script/import_scripts/phpbb3/importer.rb b/script/import_scripts/phpbb3/importer.rb new file mode 100644 index 00000000000..28c2ed6fa63 --- /dev/null +++ b/script/import_scripts/phpbb3/importer.rb @@ -0,0 +1,152 @@ +require_relative '../base' +require_relative 'support/settings' +require_relative 'database/database' +require_relative 'importers/importer_factory' + +module ImportScripts::PhpBB3 + class Importer < ImportScripts::Base + # @param settings [ImportScripts::PhpBB3::Settings] + # @param database [ImportScripts::PhpBB3::Database_3_0 | ImportScripts::PhpBB3::Database_3_1] + def initialize(settings, database) + @settings = settings + super() + + @database = database + @php_config = database.get_config_values + @importers = ImporterFactory.new(@database, @lookup, @uploader, @settings, @php_config) + end + + def perform + super if settings_check_successful? + end + + protected + + def execute + puts '', "importing from phpBB #{@php_config[:phpbb_version]}" + + import_users + import_anonymous_users if @settings.import_anonymous_users + import_categories + import_posts + import_private_messages if @settings.import_private_messages + import_bookmarks if @settings.import_bookmarks + end + + def get_site_settings_for_import + settings = super + + max_file_size_kb = @database.get_max_attachment_size + settings[:max_image_size_kb] = [max_file_size_kb, SiteSetting.max_image_size_kb].max + settings[:max_attachment_size_kb] = [max_file_size_kb, SiteSetting.max_attachment_size_kb].max + + settings + end + + def settings_check_successful? + true + end + + def import_users + puts '', 'creating users' + total_count = @database.count_users + importer = @importers.user_importer + + batches do |offset| + rows = @database.fetch_users(offset) + break if rows.size < 1 + + create_users(rows, total: total_count, offset: offset) do |row| + importer.map_user(row) + end + end + end + + def import_anonymous_users + puts '', 'creating anonymous users' + total_count = @database.count_anonymous_users + importer = @importers.user_importer + + batches do |offset| + rows = @database.fetch_anonymous_users(offset) + break if rows.size < 1 + + create_users(rows, total: total_count, offset: offset) do |row| + importer.map_anonymous_user(row) + end + end + end + + def import_categories + puts '', 'creating categories' + rows = @database.fetch_categories + importer = @importers.category_importer + + create_categories(rows) do |row| + importer.map_category(row) + end + end + + def import_posts + puts '', 'creating topics and posts' + total_count = @database.count_posts + importer = @importers.post_importer + + batches do |offset| + rows = @database.fetch_posts(offset) + break if rows.size < 1 + + create_posts(rows, total: total_count, offset: offset) do |row| + importer.map_post(row) + end + end + end + + def import_private_messages + if @settings.fix_private_messages + puts '', 'fixing private messages' + @database.calculate_fixed_messages + end + + puts '', 'creating private messages' + total_count = @database.count_messages(@settings.fix_private_messages) + importer = @importers.message_importer + + batches do |offset| + rows = @database.fetch_messages(@settings.fix_private_messages, offset) + break if rows.size < 1 + + create_posts(rows, total: total_count, offset: offset) do |row| + importer.map_message(row) + end + end + end + + def import_bookmarks + puts '', 'creating bookmarks' + total_count = @database.count_bookmarks + importer = @importers.bookmark_importer + + batches do |offset| + rows = @database.fetch_bookmarks(offset) + break if rows.size < 1 + + create_bookmarks(rows, total: total_count, offset: offset) do |row| + importer.map_bookmark(row) + end + end + end + + def update_last_seen_at + # no need for this since the importer sets last_seen_at for each user during the import + end + + def use_bbcode_to_md? + @settings.use_bbcode_to_md + end + + def batches + super(@settings.database.batch_size) + end + end +end diff --git a/script/import_scripts/phpbb3/importers/attachment_importer.rb b/script/import_scripts/phpbb3/importers/attachment_importer.rb new file mode 100644 index 00000000000..e41ca7a1209 --- /dev/null +++ b/script/import_scripts/phpbb3/importers/attachment_importer.rb @@ -0,0 +1,36 @@ +module ImportScripts::PhpBB3 + class AttachmentImporter + # @param database [ImportScripts::PhpBB3::Database_3_0 | ImportScripts::PhpBB3::Database_3_1] + # @param uploader [ImportScripts::Uploader] + # @param settings [ImportScripts::PhpBB3::Settings] + # @param phpbb_config [Hash] + def initialize(database, uploader, settings, phpbb_config) + @database = database + @uploader = uploader + + @attachment_path = File.join(settings.base_dir, phpbb_config[:attachment_path]) + end + + def import_attachments(user_id, post_id, topic_id = 0) + rows = @database.fetch_attachments(topic_id, post_id) + return nil if rows.size < 1 + + attachments = [] + + rows.each do |row| + path = File.join(@attachment_path, row[:physical_filename]) + filename = CGI.unescapeHTML(row[:real_filename]) + upload = @uploader.create_upload(user_id, path, filename) + + if upload.nil? || !upload.valid? + puts "Failed to upload #{path}" + puts upload.errors.inspect if upload + else + attachments << @uploader.html_for_upload(upload, filename) + end + end + + attachments + end + end +end diff --git a/script/import_scripts/phpbb3/importers/avatar_importer.rb b/script/import_scripts/phpbb3/importers/avatar_importer.rb new file mode 100644 index 00000000000..3db8b701004 --- /dev/null +++ b/script/import_scripts/phpbb3/importers/avatar_importer.rb @@ -0,0 +1,107 @@ +module ImportScripts::PhpBB3 + class AvatarImporter + # @param uploader [ImportScripts::Uploader] + # @param settings [ImportScripts::PhpBB3::Settings] + # @param phpbb_config [Hash] + def initialize(uploader, settings, phpbb_config) + @uploader = uploader + @settings = settings + + @uploaded_avatar_path = File.join(settings.base_dir, phpbb_config[:avatar_path]) + @gallery_path = File.join(settings.base_dir, phpbb_config[:avatar_gallery_path]) + @avatar_salt = phpbb_config[:avatar_salt] + end + + def import_avatar(user, row) + avatar_type = row[:user_avatar_type] + return unless is_avatar_importable?(user, avatar_type) + + filename = row[:user_avatar] + path = get_avatar_path(avatar_type, filename) + return if path.nil? + + begin + filename = "avatar#{File.extname(path)}" + upload = @uploader.create_upload(user.id, path, filename) + + if upload.persisted? + user.import_mode = false + user.create_user_avatar + user.import_mode = true + user.user_avatar.update(custom_upload_id: upload.id) + user.update(uploaded_avatar_id: upload.id) + else + Rails.logger.error("Could not persist avatar for user #{user.username}") + end + rescue SystemCallError => err + Rails.logger.error("Could not import avatar for user #{user.username}: #{err.message}") + end + end + + protected + + def is_avatar_importable?(user, avatar_type) + is_allowed_avatar_type?(avatar_type) && user.uploaded_avatar_id.blank? + end + + def get_avatar_path(avatar_type, filename) + case avatar_type + when Constants::AVATAR_TYPE_UPLOADED then + filename.gsub!(/_[0-9]+\./, '.') # we need 1337.jpg, not 1337_2983745.jpg + get_uploaded_path(filename) + when Constants::AVATAR_TYPE_GALLERY then + get_gallery_path(filename) + when Constants::AVATAR_TYPE_REMOTE then + download_avatar(filename) + else + Rails.logger.error("Invalid avatar type #{avatar_type}. Skipping...") + nil + end + end + + # Tries to download the remote avatar. + def download_avatar(url) + max_image_size_kb = SiteSetting.max_image_size_kb.kilobytes + + begin + avatar_file = FileHelper.download(url, max_image_size_kb, 'discourse-avatar') + rescue StandardError => err + warn "Error downloading avatar: #{err.message}. Skipping..." + return nil + end + + if avatar_file + if avatar_file.size <= max_image_size_kb + return avatar_file + else + Rails.logger.error("Failed to download remote avatar: #{url} - Image is larger than #{max_image_size_kb} KB") + return nil + end + end + + Rails.logger.error("There was an error while downloading '#{url}' locally.") + nil + end + + def get_uploaded_path(filename) + File.join(@uploaded_avatar_path, "#{@avatar_salt}_#{filename}") + end + + def get_gallery_path(filename) + File.join(@gallery_path, filename) + end + + def is_allowed_avatar_type?(avatar_type) + case avatar_type + when Constants::AVATAR_TYPE_UPLOADED then + @settings.import_uploaded_avatars + when Constants::AVATAR_TYPE_REMOTE then + @settings.import_remote_avatars + when Constants::AVATAR_TYPE_GALLERY then + @settings.import_gallery_avatars + else + false + end + end + end +end diff --git a/script/import_scripts/phpbb3/importers/bookmark_importer.rb b/script/import_scripts/phpbb3/importers/bookmark_importer.rb new file mode 100644 index 00000000000..febc8ab8697 --- /dev/null +++ b/script/import_scripts/phpbb3/importers/bookmark_importer.rb @@ -0,0 +1,10 @@ +module ImportScripts::PhpBB3 + class BookmarkImporter + def map_bookmark(row) + { + user_id: row[:user_id], + post_id: row[:topic_first_post_id] + } + end + end +end diff --git a/script/import_scripts/phpbb3/importers/category_importer.rb b/script/import_scripts/phpbb3/importers/category_importer.rb new file mode 100644 index 00000000000..65eeb4097e6 --- /dev/null +++ b/script/import_scripts/phpbb3/importers/category_importer.rb @@ -0,0 +1,47 @@ +module ImportScripts::PhpBB3 + class CategoryImporter + # @param lookup [ImportScripts::LookupContainer] + # @param text_processor [ImportScripts::PhpBB3::TextProcessor] + def initialize(lookup, text_processor) + @lookup = lookup + @text_processor = text_processor + end + + def map_category(row) + { + id: row[:forum_id], + name: CGI.unescapeHTML(row[:forum_name]), + parent_category_id: @lookup.category_id_from_imported_category_id(row[:parent_id]), + post_create_action: proc do |category| + update_category_description(category, row) + end + } + end + + protected + + # @param category [Category] + def update_category_description(category, row) + return if row[:forum_desc].blank? && row[:first_post_time].blank? + + topic = category.topic + post = topic.first_post + + if row[:first_post_time].present? + created_at = Time.zone.at(row[:first_post_time]) + + topic.created_at = created_at + topic.save + + post.created_at = created_at + post.save + end + + if row[:forum_desc].present? + changes = {raw: @text_processor.process_raw_text(row[:forum_desc])} + opts = {revised_at: post.created_at, bypass_bump: true} + post.revise(Discourse.system_user, changes, opts) + end + end + end +end diff --git a/script/import_scripts/phpbb3/importers/importer_factory.rb b/script/import_scripts/phpbb3/importers/importer_factory.rb new file mode 100644 index 00000000000..4b793a153ae --- /dev/null +++ b/script/import_scripts/phpbb3/importers/importer_factory.rb @@ -0,0 +1,69 @@ +require_relative 'attachment_importer' +require_relative 'avatar_importer' +require_relative 'bookmark_importer' +require_relative 'category_importer' +require_relative 'message_importer' +require_relative 'poll_importer' +require_relative 'post_importer' +require_relative 'user_importer' +require_relative '../support/smiley_processor' +require_relative '../support/text_processor' + +module ImportScripts::PhpBB3 + class ImporterFactory + # @param database [ImportScripts::PhpBB3::Database_3_0 | ImportScripts::PhpBB3::Database_3_1] + # @param lookup [ImportScripts::LookupContainer] + # @param uploader [ImportScripts::Uploader] + # @param settings [ImportScripts::PhpBB3::Settings] + # @param phpbb_config [Hash] + def initialize(database, lookup, uploader, settings, phpbb_config) + @database = database + @lookup = lookup + @uploader = uploader + @settings = settings + @phpbb_config = phpbb_config + end + + def user_importer + UserImporter.new(avatar_importer, @settings) + end + + def category_importer + CategoryImporter.new(@lookup, text_processor) + end + + def post_importer + PostImporter.new(@lookup, text_processor, attachment_importer, poll_importer, @settings) + end + + def message_importer + MessageImporter.new(@database, @lookup, text_processor, attachment_importer, @settings) + end + + def bookmark_importer + BookmarkImporter.new + end + + protected + + def attachment_importer + AttachmentImporter.new(@database, @uploader, @settings, @phpbb_config) + end + + def avatar_importer + AvatarImporter.new(@uploader, @settings, @phpbb_config) + end + + def poll_importer + PollImporter.new(@lookup, @database, text_processor) + end + + def text_processor + @text_processor ||= TextProcessor.new(@lookup, @database, smiley_processor, @settings) + end + + def smiley_processor + SmileyProcessor.new(@uploader, @settings, @phpbb_config) + end + end +end diff --git a/script/import_scripts/phpbb3/importers/message_importer.rb b/script/import_scripts/phpbb3/importers/message_importer.rb new file mode 100644 index 00000000000..6200b0b0230 --- /dev/null +++ b/script/import_scripts/phpbb3/importers/message_importer.rb @@ -0,0 +1,83 @@ +module ImportScripts::PhpBB3 + class MessageImporter + # @param database [ImportScripts::PhpBB3::Database_3_0 | ImportScripts::PhpBB3::Database_3_1] + # @param lookup [ImportScripts::LookupContainer] + # @param text_processor [ImportScripts::PhpBB3::TextProcessor] + # @param attachment_importer [ImportScripts::PhpBB3::AttachmentImporter] + # @param settings [ImportScripts::PhpBB3::Settings] + def initialize(database, lookup, text_processor, attachment_importer, settings) + @database = database + @lookup = lookup + @text_processor = text_processor + @attachment_importer = attachment_importer + @settings = settings + end + + def map_message(row) + user_id = @lookup.user_id_from_imported_user_id(row[:author_id]) || Discourse.system_user.id + attachments = import_attachments(row, user_id) + + mapped = { + id: "pm:#{row[:msg_id]}", + user_id: user_id, + created_at: Time.zone.at(row[:message_time]), + raw: @text_processor.process_private_msg(row[:message_text], attachments) + } + + if row[:root_msg_id] == 0 + map_first_message(row, mapped) + else + map_other_message(row, mapped) + end + end + + protected + + def import_attachments(row, user_id) + if @settings.import_attachments && row[:attachment_count] > 0 + @attachment_importer.import_attachments(user_id, row[:msg_id]) + end + end + + def map_first_message(row, mapped) + mapped[:title] = CGI.unescapeHTML(row[:message_subject]) + mapped[:archetype] = Archetype.private_message + mapped[:target_usernames] = get_usernames(row[:msg_id], row[:author_id]) + + if mapped[:target_usernames].empty? # pm with yourself? + puts "Private message without recipients. Skipping #{row[:msg_id]}: #{row[:message_subject][0..40]}" + return nil + end + + mapped + end + + def map_other_message(row, mapped) + parent_msg_id = "pm:#{row[:root_msg_id]}" + parent = @lookup.topic_lookup_from_imported_post_id(parent_msg_id) + + if parent.blank? + puts "Parent post #{parent_msg_id} doesn't exist. Skipping #{row[:msg_id]}: #{row[:message_subject][0..40]}" + return nil + end + + mapped[:topic_id] = parent[:topic_id] + mapped + end + + def get_usernames(msg_id, author_id) + # Find the users who are part of this private message. + # Found from the to_address of phpbb_privmsgs, by looking at + # all the rows with the same root_msg_id. + # to_address looks like this: "u_91:u_1234:u_200" + # The "u_" prefix is discarded and the rest is a user_id. + import_user_ids = @database.fetch_message_participants(msg_id, @settings.fix_private_messages) + .map { |r| r[:to_address].split(':') } + .flatten!.uniq.map! { |u| u[2..-1] } + + import_user_ids.map! do |import_user_id| + import_user_id.to_s == author_id.to_s ? nil : @lookup.find_user_by_import_id(import_user_id).try(:username) + end.compact + end + end +end diff --git a/script/import_scripts/phpbb3/importers/poll_importer.rb b/script/import_scripts/phpbb3/importers/poll_importer.rb new file mode 100644 index 00000000000..665aae94e1d --- /dev/null +++ b/script/import_scripts/phpbb3/importers/poll_importer.rb @@ -0,0 +1,155 @@ +module ImportScripts::PhpBB3 + class PollImporter + POLL_PLUGIN_NAME = 'poll' + + # @param lookup [ImportScripts::LookupContainer] + # @param database [ImportScripts::PhpBB3::Database_3_0 | ImportScripts::PhpBB3::Database_3_1] + # @param text_processor [ImportScripts::PhpBB3::TextProcessor] + def initialize(lookup, database, text_processor) + @lookup = lookup + @database = database + @text_processor = text_processor + + poll_plugin = Discourse.plugins.find { |p| p.metadata.name == POLL_PLUGIN_NAME }.singleton_class + @default_poll_name = poll_plugin.const_get(:DEFAULT_POLL_NAME) + @polls_field = poll_plugin.const_get(:POLLS_CUSTOM_FIELD) + @votes_field = poll_plugin.const_get(:VOTES_CUSTOM_FIELD) + end + + # @param poll [ImportScripts::PhpBB3::Poll] + def map_poll(topic_id, poll) + options = get_poll_options(topic_id) + poll_text = get_poll_text(options, poll) + extracted_poll = extract_default_poll(topic_id, poll_text) + + update_poll(extracted_poll, options, topic_id, poll) + + mapped_poll = { + raw: poll_text, + custom_fields: {} + } + + add_polls_field(mapped_poll[:custom_fields], extracted_poll) + add_vote_fields(mapped_poll[:custom_fields], topic_id, poll) + mapped_poll + end + + protected + + def get_poll_options(topic_id) + rows = @database.fetch_poll_options(topic_id) + options_by_text = {} + + rows.each do |row| + option_text = @text_processor.process_raw_text(row[:poll_option_text]).delete("\n") + + if options_by_text.key?(option_text) + # phpBB allows duplicate options (why?!) - we need to merge them + option = options_by_text[option_text] + option[:ids] << row[:poll_option_id] + option[:votes] += row[:poll_option_total] + else + options_by_text[option_text] = { + ids: [row[:poll_option_id]], + text: option_text, + votes: row[:poll_option_total] + } + end + end + + options_by_text.values + end + + # @param options [Array] + # @param poll [ImportScripts::PhpBB3::Poll] + def get_poll_text(options, poll) + poll_text = "#{poll.title}\n" + + if poll.max_options > 1 + poll_text << "[poll type=multiple max=#{poll.max_options}]" + else + poll_text << '[poll]' + end + + options.each do |option| + poll_text << "\n- #{option[:text]}" + end + + poll_text << "\n[/poll]" + end + + def extract_default_poll(topic_id, poll_text) + extracted_polls = DiscoursePoll::Poll::extract(poll_text, topic_id) + extracted_polls.each do |poll| + return poll if poll['name'] == @default_poll_name + end + end + + # @param poll [ImportScripts::PhpBB3::Poll] + def update_poll(default_poll, imported_options, topic_id, poll) + default_poll['voters'] = @database.count_voters(topic_id) # this includes anonymous voters + default_poll['status'] = poll.has_ended? ? :open : :closed + + default_poll['options'].each_with_index do |option, index| + imported_option = imported_options[index] + option['votes'] = imported_option[:votes] + poll.add_option_id(imported_option[:ids], option['id']) + end + end + + def add_polls_field(custom_fields, default_poll) + custom_fields[@polls_field] = {@default_poll_name => default_poll} + end + + # @param custom_fields [Hash] + # @param poll [ImportScripts::PhpBB3::Poll] + def add_vote_fields(custom_fields, topic_id, poll) + rows = @database.fetch_poll_votes(topic_id) + warned = false + + rows.each do |row| + option_id = poll.option_id_from_imported_option_id(row[:poll_option_id]) + user_id = @lookup.user_id_from_imported_user_id(row[:user_id]) + + if option_id.present? && user_id.present? + key = "#{@votes_field}-#{user_id}" + + if custom_fields.key?(key) + votes = custom_fields[key][@default_poll_name] + else + votes = [] + custom_fields[key] = {@default_poll_name => votes} + end + + votes << option_id + else !warned + Rails.logger.warn("Topic with id #{topic_id} has invalid votes.") + end + end + end + end + + class Poll + attr_reader :title + attr_reader :max_options + + def initialize(title, max_options, end_timestamp) + @title = title + @max_options = max_options + @end_timestamp = end_timestamp + @option_ids = {} + end + + def has_ended? + @end_timestamp.nil? || Time.zone.at(@end_timestamp) > Time.now + end + + def add_option_id(imported_ids, option_id) + imported_ids.each { |imported_id| @option_ids[imported_id] = option_id } + end + + def option_id_from_imported_option_id(imported_id) + @option_ids[imported_id] + end + end +end diff --git a/script/import_scripts/phpbb3/importers/post_importer.rb b/script/import_scripts/phpbb3/importers/post_importer.rb new file mode 100644 index 00000000000..be0daebbf22 --- /dev/null +++ b/script/import_scripts/phpbb3/importers/post_importer.rb @@ -0,0 +1,79 @@ +module ImportScripts::PhpBB3 + class PostImporter + # @param lookup [ImportScripts::LookupContainer] + # @param text_processor [ImportScripts::PhpBB3::TextProcessor] + # @param attachment_importer [ImportScripts::PhpBB3::AttachmentImporter] + # @param poll_importer [ImportScripts::PhpBB3::PollImporter] + # @param settings [ImportScripts::PhpBB3::Settings] + def initialize(lookup, text_processor, attachment_importer, poll_importer, settings) + @lookup = lookup + @text_processor = text_processor + @attachment_importer = attachment_importer + @poll_importer = poll_importer + @settings = settings + end + + def map_post(row) + imported_user_id = row[:post_username].blank? ? row[:poster_id] : row[:post_username] + user_id = @lookup.user_id_from_imported_user_id(imported_user_id) || Discourse.system_user.id + is_first_post = row[:post_id] == row[:topic_first_post_id] + + attachments = import_attachments(row, user_id) + + mapped = { + id: row[:post_id], + user_id: user_id, + created_at: Time.zone.at(row[:post_time]), + raw: @text_processor.process_post(row[:post_text], attachments) + } + + if is_first_post + map_first_post(row, mapped) + else + map_other_post(row, mapped) + end + end + + protected + + def import_attachments(row, user_id) + if @settings.import_attachments && row[:post_attachment] > 0 + @attachment_importer.import_attachments(user_id, row[:post_id], row[:topic_id]) + end + end + + def map_first_post(row, mapped) + mapped[:category] = @lookup.category_id_from_imported_category_id(row[:forum_id]) + mapped[:title] = CGI.unescapeHTML(row[:topic_title]).strip[0...255] + mapped[:pinned_at] = mapped[:created_at] unless row[:topic_type] == Constants::POST_NORMAL + mapped[:pinned_globally] = row[:topic_type] == Constants::POST_GLOBAL + + add_poll(row, mapped) if @settings.import_polls + mapped + end + + def map_other_post(row, mapped) + parent = @lookup.topic_lookup_from_imported_post_id(row[:topic_first_post_id]) + + if parent.blank? + puts "Parent post #{row[:topic_first_post_id]} doesn't exist. Skipping #{row[:post_id]}: #{row[:topic_title][0..40]}" + return nil + end + + mapped[:topic_id] = parent[:topic_id] + mapped + end + + def add_poll(row, mapped_post) + return if row[:poll_title].blank? + + poll = Poll.new(row[:poll_title], row[:poll_max_options], row[:poll_end]) + mapped_poll = @poll_importer.map_poll(row[:topic_id], poll) + + if mapped_poll.present? + mapped_post[:raw] = mapped_poll[:raw] << "\n" << mapped_post[:raw] + mapped_post[:custom_fields] = mapped_poll[:custom_fields] + end + end + end +end diff --git a/script/import_scripts/phpbb3/importers/user_importer.rb b/script/import_scripts/phpbb3/importers/user_importer.rb new file mode 100644 index 00000000000..0a9ba8c45be --- /dev/null +++ b/script/import_scripts/phpbb3/importers/user_importer.rb @@ -0,0 +1,97 @@ +require_relative '../support/constants' + +module ImportScripts::PhpBB3 + class UserImporter + # @param avatar_importer [ImportScripts::PhpBB3::AvatarImporter] + # @param settings [ImportScripts::PhpBB3::Settings] + def initialize(avatar_importer, settings) + @avatar_importer = avatar_importer + @settings = settings + end + + def map_user(row) + is_active_user = row[:user_inactive_reason] != Constants::INACTIVE_REGISTER + + { + id: row[:user_id], + email: row[:user_email], + username: row[:username], + name: @settings.username_as_name ? row[:username] : '', + created_at: Time.zone.at(row[:user_regdate]), + last_seen_at: row[:user_lastvisit] == 0 ? Time.zone.at(row[:user_regdate]) : Time.zone.at(row[:user_lastvisit]), + registration_ip_address: (IPAddr.new(row[:user_ip]) rescue nil), + active: is_active_user, + trust_level: row[:user_posts] == 0 ? TrustLevel[0] : TrustLevel[1], + approved: is_active_user, + approved_by_id: is_active_user ? Discourse.system_user.id : nil, + approved_at: is_active_user ? Time.now : nil, + moderator: row[:group_name] == Constants::GROUP_MODERATORS, + admin: row[:group_name] == Constants::GROUP_ADMINISTRATORS, + website: row[:user_website], + location: row[:user_from], + date_of_birth: parse_birthdate(row), + post_create_action: proc do |user| + suspend_user(user, row) + @avatar_importer.import_avatar(user, row) if row[:user_avatar_type].present? + end + } + end + + def map_anonymous_user(row) + username = row[:post_username] + + { + id: username, + email: "anonymous_no_email_#{username}", + username: username, + name: '', + created_at: Time.zone.at(row[:first_post_time]), + active: true, + trust_level: TrustLevel[0], + approved: true, + approved_by_id: Discourse.system_user.id, + approved_at: Time.now, + post_create_action: proc do |user| + row[:user_inactive_reason] = Constants::INACTIVE_MANUAL + row[:ban_reason] = 'Anonymous user from phpBB3' # TODO i18n + suspend_user(user, row, true) + end + } + end + + protected + + def parse_birthdate(row) + return nil if row[:user_birthday].blank? + Date.strptime(row[:user_birthday].delete(' '), '%d-%m-%Y') rescue nil + end + + # Suspends the user if it is currently banned. + def suspend_user(user, row, disable_email = false) + if row[:user_inactive_reason] == Constants::INACTIVE_MANUAL + user.suspended_at = Time.now + user.suspended_till = 200.years.from_now + ban_reason = row[:ban_reason].blank? ? 'Account deactivated by administrator' : row[:ban_reason] # TODO i18n + elsif row[:ban_start].present? + user.suspended_at = Time.zone.at(row[:ban_start]) + user.suspended_till = row[:ban_end] > 0 ? Time.zone.at(row[:ban_end]) : 200.years.from_now + ban_reason = row[:ban_reason] + else + return + end + + if disable_email + user.email_digests = false + user.email_private_messages = false + user.email_direct = false + user.email_always = false + end + + if user.save + StaffActionLogger.new(Discourse.system_user).log_user_suspend(user, ban_reason) + else + Rails.logger.error("Failed to suspend user #{user.username}. #{user.errors.try(:full_messages).try(:inspect)}") + end + end + end +end diff --git a/script/import_scripts/phpbb3/settings.yml b/script/import_scripts/phpbb3/settings.yml new file mode 100644 index 00000000000..b591d39646b --- /dev/null +++ b/script/import_scripts/phpbb3/settings.yml @@ -0,0 +1,59 @@ +# This is an example settings file for the phpBB3 importer. + +database: + type: MySQL # currently only MySQL is supported - more to come soon + host: localhost + username: root + password: + schema: phpbb + table_prefix: phpbb # Usually all table names start with phpbb. Change this, if your forum is using a different prefix. + batch_size: 1000 # Don't change this unless you know what you're doing. The default (1000) should work just fine. + +import: + # Enable this option if you want to have a better conversion of BBCodes to Markdown. + # WARNING: This can slow down your import. + use_bbcode_to_md: false + + # This is the path to the root directory of your current phpBB installation (or a copy of it). + # The importer expects to find the /files and /images directories within the base directory. + # This is only needed if you want to import avatars, attachments or custom smilies. + phpbb_base_dir: /var/www/phpbb + + site_prefix: + # this is needed for rewriting internal links in posts + original: oldsite.example.com/forums # without http(s):// + new: http://discourse.example.com # with http:// or https:// + + avatars: + uploaded: true # import uploaded avatars + gallery: true # import the predefined avatars phpBB offers + remote: false # WARNING: This can considerably slow down your import. It will try to download remote avatars. + + # When true: Anonymous users are imported as suspended users. They can't login and have no email address. + # When false: The system user will be used for all anonymous users. + anonymous_users: true + + # By default all the following things get imported. You can disable them by setting them to false. + bookmarks: true + attachments: true + private_messages: true + polls: true + + # This tries to fix Private Messages that were imported from phpBB2 to phpBB3. + # You should enable this option if you see duplicate messages or lots of related + # messages as topics with just one post (e.g. 'Importer', 'Re: Importer', 'Re: Importer' + # should be one topic named 'Importer' and consist of 3 posts). + fix_private_messages: false + + # When true: each imported user will have the original username from phpBB as its name + # When false: the name of each user will be blank + username_as_name: false + + # Map Emojis to smilies used in phpBB. Most of the default smilies already have a mapping, but you can override + # the mappings here, if you don't like some of them. + # The mapping syntax is: emoji_name: 'smiley_in_phpbb' + # Or map multiple smilies to one Emoji: emoji_name: ['smiley1', 'smiley2'] + emojis: + # here are two example mappings... + smiley: [':D', ':-D', ':grin:'] + heart: ':love:' diff --git a/script/import_scripts/phpbb3/support/constants.rb b/script/import_scripts/phpbb3/support/constants.rb new file mode 100644 index 00000000000..af7482d5da6 --- /dev/null +++ b/script/import_scripts/phpbb3/support/constants.rb @@ -0,0 +1,35 @@ +module ImportScripts::PhpBB3 + class Constants + ACTIVE_USER = 0 + INACTIVE_REGISTER = 1 # Newly registered account + INACTIVE_PROFILE = 2 # Profile details changed + INACTIVE_MANUAL = 3 # Account deactivated by administrator + INACTIVE_REMIND = 4 # Forced user account reactivation + + GROUP_ADMINISTRATORS = 'ADMINISTRATORS' + GROUP_MODERATORS = 'GLOBAL_MODERATORS' + + # https://wiki.phpbb.com/Table.phpbb_users + USER_TYPE_NORMAL = 0 + USER_TYPE_INACTIVE = 1 + USER_TYPE_IGNORE = 2 + USER_TYPE_FOUNDER = 3 + + AVATAR_TYPE_UPLOADED = 1 + AVATAR_TYPE_REMOTE = 2 + AVATAR_TYPE_GALLERY = 3 + + FORUM_TYPE_CATEGORY = 0 + FORUM_TYPE_POST = 1 + FORUM_TYPE_LINK = 2 + + TOPIC_UNLOCKED = 0 + TOPIC_LOCKED = 1 + TOPIC_MOVED = 2 + + POST_NORMAL = 0 + POST_STICKY = 1 + POST_ANNOUNCE = 2 + POST_GLOBAL = 3 + end +end diff --git a/script/import_scripts/phpbb3/support/settings.rb b/script/import_scripts/phpbb3/support/settings.rb new file mode 100644 index 00000000000..8a0c36ee199 --- /dev/null +++ b/script/import_scripts/phpbb3/support/settings.rb @@ -0,0 +1,78 @@ +require 'yaml' + +module ImportScripts::PhpBB3 + class Settings + def self.load(filename) + yaml = YAML::load_file(filename) + Settings.new(yaml) + end + + attr_reader :import_anonymous_users + attr_reader :import_attachments + attr_reader :import_private_messages + attr_reader :import_polls + attr_reader :import_bookmarks + + attr_reader :import_uploaded_avatars + attr_reader :import_remote_avatars + attr_reader :import_gallery_avatars + + attr_reader :fix_private_messages + attr_reader :use_bbcode_to_md + + attr_reader :original_site_prefix + attr_reader :new_site_prefix + attr_reader :base_dir + + attr_reader :username_as_name + attr_reader :emojis + + attr_reader :database + + def initialize(yaml) + import_settings = yaml['import'] + @import_anonymous_users = import_settings['anonymous_users'] + @import_attachments = import_settings['attachments'] + @import_private_messages = import_settings['private_messages'] + @import_polls = import_settings['polls'] + @import_bookmarks = import_settings['bookmarks'] + + avatar_settings = import_settings['avatars'] + @import_uploaded_avatars = avatar_settings['uploaded'] + @import_remote_avatars = avatar_settings['remote'] + @import_gallery_avatars = avatar_settings['gallery'] + + @fix_private_messages = import_settings['fix_private_messages'] + @use_bbcode_to_md =import_settings['use_bbcode_to_md'] + + @original_site_prefix = import_settings['site_prefix']['original'] + @new_site_prefix = import_settings['site_prefix']['new'] + @base_dir = import_settings['phpbb_base_dir'] + + @username_as_name = import_settings['username_as_name'] + @emojis = import_settings.fetch('emojis', []) + + @database = DatabaseSettings.new(yaml['database']) + end + end + + class DatabaseSettings + attr_reader :type + attr_reader :host + attr_reader :username + attr_reader :password + attr_reader :schema + attr_reader :table_prefix + attr_reader :batch_size + + def initialize(yaml) + @type = yaml['type'] + @host = yaml['host'] + @username = yaml['username'] + @password = yaml['password'] + @schema = yaml['schema'] + @table_prefix = yaml['table_prefix'] + @batch_size = yaml['batch_size'] + end + end +end diff --git a/script/import_scripts/phpbb3/support/smiley_processor.rb b/script/import_scripts/phpbb3/support/smiley_processor.rb new file mode 100644 index 00000000000..f79a24c4659 --- /dev/null +++ b/script/import_scripts/phpbb3/support/smiley_processor.rb @@ -0,0 +1,90 @@ +module ImportScripts::PhpBB3 + class SmileyProcessor + # @param uploader [ImportScripts::Uploader] + # @param settings [ImportScripts::PhpBB3::Settings] + # @param phpbb_config [Hash] + def initialize(uploader, settings, phpbb_config) + @uploader = uploader + @smilies_path = File.join(settings.base_dir, phpbb_config[:smilies_path]) + + @smiley_map = {} + add_default_smilies + add_configured_smilies(settings.emojis) + end + + def replace_smilies(text) + # :) is encoded as :) + text.gsub!(/(.*?)/) do + smiley = $1 + + @smiley_map.fetch(smiley) do + upload_smiley(smiley, $2, $3, $4) || smiley_as_text(smiley) + end + end + end + + protected + + def add_default_smilies + { + [':D', ':-D', ':grin:'] => ':smiley:', + [':)', ':-)', ':smile:'] => ':smile:', + [';)', ';-)', ':wink:'] => ':wink:', + [':(', ':-(', ':sad:'] => ':frowning:', + [':o', ':-o', ':eek:'] => ':astonished:', + [':shock:'] => ':open_mouth:', + [':?', ':-?', ':???:'] => ':confused:', + ['8-)', ':cool:'] => ':sunglasses:', + [':lol:'] => ':laughing:', + [':x', ':-x', ':mad:'] => ':angry:', + [':P', ':-P', ':razz:'] => ':stuck_out_tongue:', + [':oops:'] => ':blush:', + [':cry:'] => ':cry:', + [':evil:'] => ':imp:', + [':twisted:'] => ':smiling_imp:', + [':roll:'] => ':unamused:', + [':!:'] => ':exclamation:', + [':?:'] => ':question:', + [':idea:'] => ':bulb:', + [':arrow:'] => ':arrow_right:', + [':|', ':-|'] => ':neutral_face:' + }.each do |smilies, emoji| + smilies.each { |smiley| @smiley_map[smiley] = emoji } + end + end + + def add_configured_smilies(emojis) + emojis.each do |emoji, smilies| + Array.wrap(smilies) + .each { |smiley| @smiley_map[smiley] = ":#{emoji}:" } + end + end + + def upload_smiley(smiley, path, alt_text, title) + path = File.join(@smilies_path, path) + filename = File.basename(path) + upload = @uploader.create_upload(Discourse::SYSTEM_USER_ID, path, filename) + + if upload.nil? || !upload.valid? + puts "Failed to upload #{path}" + puts upload.errors.inspect if upload + html = nil + else + html = embedded_image_html(upload, alt_text, title) + @smiley_map[smiley] = html + end + + html + end + + def embedded_image_html(upload, alt_text, title) + image_width = [upload.width, SiteSetting.max_image_width].compact.min + image_height = [upload.height, SiteSetting.max_image_height].compact.min + %Q[#{alt_text}] + end + + def smiley_as_text(smiley) + @smiley_map[smiley] = smiley + end + end +end diff --git a/script/import_scripts/phpbb3/support/text_processor.rb b/script/import_scripts/phpbb3/support/text_processor.rb new file mode 100644 index 00000000000..c0e99e4dd2a --- /dev/null +++ b/script/import_scripts/phpbb3/support/text_processor.rb @@ -0,0 +1,133 @@ +module ImportScripts::PhpBB3 + class TextProcessor + # @param lookup [ImportScripts::LookupContainer] + # @param database [ImportScripts::PhpBB3::Database_3_0 | ImportScripts::PhpBB3::Database_3_1] + # @param smiley_processor [ImportScripts::PhpBB3::SmileyProcessor] + # @param settings [ImportScripts::PhpBB3::Settings] + def initialize(lookup, database, smiley_processor, settings) + @lookup = lookup + @database = database + @smiley_processor = smiley_processor + + @new_site_prefix = settings.new_site_prefix + create_internal_link_regexps(settings.original_site_prefix) + end + + def process_raw_text(raw) + text = raw.dup + text = CGI.unescapeHTML(text) + + clean_bbcodes(text) + process_smilies(text) + process_links(text) + process_lists(text) + + text + end + + def process_post(raw, attachments) + text = process_raw_text(raw) + text = process_attachments(text, attachments) if attachments.present? + text + end + + def process_private_msg(raw, attachments) + text = process_raw_text(raw) + text = process_attachments(text, attachments) if attachments.present? + text + end + + protected + + def clean_bbcodes(text) + # Many phpbb bbcode tags have a hash attached to them. Examples: + # [url=https://google.com:1qh1i7ky]click here[/url:1qh1i7ky] + # [quote="cybereality":b0wtlzex]Some text.[/quote:b0wtlzex] + text.gsub!(/:(?:\w{8})\]/, ']') + end + + def process_smilies(text) + @smiley_processor.replace_smilies(text) + end + + def process_links(text) + # Internal forum links can have this forms: + # for topics: viewtopic.php?f=26&t=3412 + # for posts: viewtopic.php?p=1732#p1732 + text.gsub!(@long_internal_link_regexp) do |link| + replace_internal_link(link, $1, $2) + end + + # Some links look like this: http://www.onegameamonth.com + text.gsub!(/(.+)<\/a>/i, '[\2](\1)') + + # Replace internal forum links that aren't in the format + text.gsub!(@short_internal_link_regexp) do |link| + replace_internal_link(link, $1, $2) + end + + # phpBB shortens link text like this, which breaks our markdown processing: + # [http://answers.yahoo.com/question/index ... 223AAkkPli](http://answers.yahoo.com/question/index?qid=20070920134223AAkkPli) + # + # Work around it for now: + text.gsub!(/\[http(s)?:\/\/(www\.)?/i, '[') + end + + def replace_internal_link(link, import_topic_id, import_post_id) + if import_post_id.nil? + replace_internal_topic_link(link, import_topic_id) + else + replace_internal_post_link(link, import_post_id) + end + end + + def replace_internal_topic_link(link, import_topic_id) + import_post_id = @database.get_first_post_id(import_topic_id) + return link if import_post_id.nil? + + replace_internal_post_link(link, import_post_id) + end + + def replace_internal_post_link(link, import_post_id) + topic = @lookup.topic_lookup_from_imported_post_id(import_post_id) + topic ? "#{@new_site_prefix}#{topic[:url]}" : link + end + + def process_lists(text) + # convert list tags to ul and list=1 tags to ol + # list=a is not supported, so handle it like list=1 + # list=9 and list=x have the same result as list=1 and list=a + text.gsub!(/\[list\](.*?)\[\/list:u\]/mi, '[ul]\1[/ul]') + text.gsub!(/\[list=.*?\](.*?)\[\/list:o\]/mi, '[ol]\1[/ol]') + + # convert *-tags to li-tags so bbcode-to-md can do its magic on phpBB's lists: + text.gsub!(/\[\*\](.*?)\[\/\*:m\]/mi, '[li]\1[/li]') + end + + # This replaces existing [attachment] BBCodes with the corresponding HTML tags for Discourse. + # All attachments that haven't been referenced in the text are appended to the end of the text. + def process_attachments(text, attachments) + attachment_regexp = /\[attachment=([\d])+\]([^<]+)\[\/attachment\]?/i + unreferenced_attachments = attachments.dup + + text = text.gsub(attachment_regexp) do + index = $1.to_i + real_filename = $2 + unreferenced_attachments[index] = nil + attachments.fetch(index, real_filename) + end + + unreferenced_attachments = unreferenced_attachments.compact + text << "\n" << unreferenced_attachments.join("\n") unless unreferenced_attachments.empty? + text + end + + def create_internal_link_regexps(original_site_prefix) + host = original_site_prefix.gsub('.', '\.') + link_regex = "http(?:s)?://#{host}/viewtopic\\.php\\?(?:\\S*)(?:t=(\\d+)|p=(\\d+)(?:#p\\d+)?)" + + @long_internal_link_regexp = Regexp.new(%Q||, Regexp::IGNORECASE) + @short_internal_link_regexp = Regexp.new(link_regex, Regexp::IGNORECASE) + end + end +end