From 2dd01c61b0eed1c4bbb20872098931651387cbdf Mon Sep 17 00:00:00 2001 From: Gerhard Schlager Date: Mon, 4 May 2015 23:09:58 +0200 Subject: [PATCH 1/2] Improves the base importer - Move some methods into their own classes in order to make it easier to reuse them outside of classes extending the base importer. For compatibility reasons the old methods are still in the base importer and delegate to the new objects. The following methods and hashes were extracted: - all the lookup maps for existing and imported data - all the methods used for uploads and attachments - No need to store failed users and groups. This information wasn't used anyway. - Print progress instead of category names when importing categories. - Allow importers to override if bbcode_to_md should be used (until now it always used ARGV) - Allow importers to add additional site settings that automatically get restored after the importer finishes. - Show how many posts and messages are imported per minute. This should help detecting when the import is slowing down and needs to be restarted. - Use max_image_width and max_image_height from settings instead of hard-coded values for uploaded images. --- script/import_scripts/base.rb | 254 ++++++++---------- .../import_scripts/base/lookup_container.rb | 99 +++++++ script/import_scripts/base/uploader.rb | 45 ++++ 3 files changed, 261 insertions(+), 137 deletions(-) create mode 100644 script/import_scripts/base/lookup_container.rb create mode 100644 script/import_scripts/base/uploader.rb diff --git a/script/import_scripts/base.rb b/script/import_scripts/base.rb index 093875493cd..8a3d30225eb 100644 --- a/script/import_scripts/base.rb +++ b/script/import_scripts/base.rb @@ -7,13 +7,13 @@ if ARGV.include?('bbcode-to-md') # git clone https://github.com/nlalonde/ruby-bbcode-to-md.git # cd ruby-bbcode-to-md # gem build ruby-bbcode-to-md.gemspec - # gem install ruby-bbcode-to-md-0.0.13.gem + # gem install ruby-bbcode-to-md-*.gem require 'ruby-bbcode-to-md' end require_relative '../../config/environment' -require_dependency 'url_helper' -require_dependency 'file_helper' +require_relative 'base/lookup_container' +require_relative 'base/uploader' module ImportScripts; end @@ -24,46 +24,13 @@ class ImportScripts::Base def initialize preload_i18n - @bbcode_to_md = true if ARGV.include?('bbcode-to-md') - @existing_groups = {} - @failed_groups = [] - @existing_users = {} - @failed_users = [] - @categories_lookup = {} - @existing_posts = {} - @topic_lookup = {} - @site_settings_during_import = nil + @lookup = ImportScripts::LookupContainer.new + @uploader = ImportScripts::Uploader.new + + @bbcode_to_md = true if use_bbcode_to_md? + @site_settings_during_import = {} @old_site_settings = {} - @start_time = Time.now - - puts "loading existing groups..." - GroupCustomField.where(name: 'import_id').pluck(:group_id, :value).each do |group_id, import_id| - @existing_groups[import_id] = group_id - end - - puts "loading existing users..." - UserCustomField.where(name: 'import_id').pluck(:user_id, :value).each do |user_id, import_id| - @existing_users[import_id] = user_id - end - - puts "loading existing categories..." - CategoryCustomField.where(name: 'import_id').pluck(:category_id, :value).each do |category_id, import_id| - @categories_lookup[import_id] = category_id - end - - puts "loading existing posts..." - PostCustomField.where(name: 'import_id').pluck(:post_id, :value).each do |post_id, import_id| - @existing_posts[import_id] = post_id - end - - puts "loading existing topics..." - Post.joins(:topic).pluck("posts.id, posts.topic_id, posts.post_number, topics.slug").each do |p| - @topic_lookup[p[0]] = { - topic_id: p[1], - post_number: p[2], - url: Post.url(p[3], p[1], p[2]), - } - end + @start_times = {import: Time.now} end def preload_i18n @@ -87,15 +54,15 @@ class ImportScripts::Base update_topic_count_replies reset_topic_counters - elapsed = Time.now - @start_time - puts '', "Done (#{elapsed.to_s} seconds)" + elapsed = Time.now - @start_times[:import] + puts '', '', 'Done (%02dh %02dmin %02dsec)' % [elapsed/3600, elapsed/60%60, elapsed%60] ensure reset_site_settings end - def change_site_settings - @site_settings_during_import = { + def get_site_settings_for_import + { email_domains_blacklist: '', min_topic_title_length: 1, min_post_length: 1, @@ -106,6 +73,10 @@ class ImportScripts::Base disable_emails: true, authorized_extensions: '*' } + end + + def change_site_settings + @site_settings_during_import = get_site_settings_for_import @site_settings_during_import.each do |key, value| @old_site_settings[key] = SiteSetting.send(key) @@ -124,44 +95,42 @@ class ImportScripts::Base RateLimiter.enable end + def use_bbcode_to_md? + ARGV.include?("bbcode-to-md") + end + # Implementation will do most of its work in its execute method. # It will need to call create_users, create_categories, and create_posts. def execute raise NotImplementedError end - # Get the Discourse Post id based on the id of the source record def post_id_from_imported_post_id(import_id) - @existing_posts[import_id] || @existing_posts[import_id.to_s] + @lookup.post_id_from_imported_post_id(import_id) end - # Get the Discourse topic info (a hash) based on the id of the source record def topic_lookup_from_imported_post_id(import_id) - post_id = post_id_from_imported_post_id(import_id) - post_id ? @topic_lookup[post_id] : nil + @lookup.topic_lookup_from_imported_post_id(import_id) end - # Get the Discourse Group id based on the id of the source group def group_id_from_imported_group_id(import_id) - @existing_groups[import_id] || @existing_groups[import_id.to_s] || find_group_by_import_id(import_id).try(:id) + @lookup.group_id_from_imported_group_id(import_id) end def find_group_by_import_id(import_id) - GroupCustomField.where(name: 'import_id', value: import_id.to_s).first.try(:group) + @lookup.find_group_by_import_id(import_id) end - # Get the Discourse User id based on the id of the source user def user_id_from_imported_user_id(import_id) - @existing_users[import_id] || @existing_users[import_id.to_s] || find_user_by_import_id(import_id).try(:id) + @lookup.user_id_from_imported_user_id(import_id) end def find_user_by_import_id(import_id) - UserCustomField.where(name: 'import_id', value: import_id.to_s).first.try(:user) + @lookup.find_user_by_import_id(import_id) end - # Get the Discourse Category id based on the id of the source category def category_id_from_imported_category_id(import_id) - @categories_lookup[import_id] || @categories_lookup[import_id.to_s] + @lookup.category_id_from_imported_category_id(import_id) end def create_admin(opts={}) @@ -183,31 +152,32 @@ class ImportScripts::Base # group in the original datasource. The given id will not be used # to create the Discourse group record. def create_groups(results, opts={}) - groups_created = 0 - groups_skipped = 0 + created = 0 + skipped = 0 + failed = 0 total = opts[:total] || results.size results.each do |result| g = yield(result) - if group_id_from_imported_group_id(g[:id]) - groups_skipped += 1 + if @lookup.group_id_from_imported_group_id(g[:id]) + skipped += 1 else new_group = create_group(g, g[:id]) if new_group.valid? - @existing_groups[g[:id].to_s] = new_group.id - groups_created += 1 + @lookup.add_group(g[:id].to_s, new_group) + created += 1 else - @failed_groups << g + failed += 1 puts "Failed to create group id #{g[:id]} #{new_group.name}: #{new_group.errors.full_messages}" end end - print_status groups_created + groups_skipped + @failed_groups.length + (opts[:offset] || 0), total + print_status created + skipped + failed + (opts[:offset] || 0), total end - return [groups_created, groups_skipped] + [created, skipped] end def create_group(opts, import_id) @@ -231,8 +201,9 @@ class ImportScripts::Base # user in the original datasource. The given id will not be used to # create the Discourse user record. def create_users(results, opts={}) - users_created = 0 - users_skipped = 0 + created = 0 + skipped = 0 + failed = 0 total = opts[:total] || results.size results.each do |result| @@ -240,34 +211,34 @@ class ImportScripts::Base # block returns nil to skip a user if u.nil? - users_skipped += 1 + skipped += 1 else import_id = u[:id] - if user_id_from_imported_user_id(import_id) - users_skipped += 1 + if @lookup.user_id_from_imported_user_id(import_id) + skipped += 1 elsif u[:email].present? new_user = create_user(u, import_id) if new_user.valid? && new_user.user_profile.valid? - @existing_users[import_id.to_s] = new_user.id - users_created += 1 + @lookup.add_user(import_id.to_s, new_user) + created += 1 else - @failed_users << u + failed += 1 puts "Failed to create user id: #{import_id}, username: #{new_user.username}, email: #{new_user.email}" puts "user errors: #{new_user.errors.full_messages}" puts "user_profile errors: #{new_user.user_profiler.errors.full_messages}" end else - @failed_users << u + failed += 1 puts "Skipping user id #{import_id} because email is blank" end end - print_status users_created + users_skipped + @failed_users.length + (opts[:offset] || 0), total + print_status created + skipped + failed + (opts[:offset] || 0), total end - return [users_created, users_skipped] + [created, skipped] end def create_user(opts, import_id) @@ -334,29 +305,39 @@ class ImportScripts::Base # create the Discourse category record. # Optional attributes are position, description, and parent_category_id. def create_categories(results) + created = 0 + skipped = 0 + total = results.size + results.each do |c| params = yield(c) # block returns nil to skip - next if params.nil? || category_id_from_imported_category_id(params[:id]) + if params.nil? || @lookup.category_id_from_imported_category_id(params[:id]) + skipped += 1 + else + # Basic massaging on the category name + params[:name] = "Blank" if params[:name].blank? + params[:name].strip! + params[:name] = params[:name][0..49] - # Basic massaging on the category name - params[:name] = "Blank" if params[:name].blank? - params[:name].strip! - params[:name] = params[:name][0..49] + # make sure categories don't go more than 2 levels deep + if params[:parent_category_id] + top = Category.find_by_id(params[:parent_category_id]) + top = top.parent_category while top && !top.parent_category.nil? + params[:parent_category_id] = top.id if top + end - puts "\t#{params[:name]}" + new_category = create_category(params, params[:id]) + @lookup.add_category(params[:id], new_category) - # make sure categories don't go more than 2 levels deep - if params[:parent_category_id] - top = Category.find_by_id(params[:parent_category_id]) - top = top.parent_category while top && !top.parent_category.nil? - params[:parent_category_id] = top.id if top + created += 1 end - new_category = create_category(params, params[:id]) - @categories_lookup[params[:id]] = new_category.id + print_status created + skipped, total end + + [created, skipped] end def create_category(opts, import_id) @@ -396,6 +377,7 @@ class ImportScripts::Base skipped = 0 created = 0 total = opts[:total] || results.size + start_time = get_start_time("posts-#{total}") # the post count should be unique enough to differentiate between posts and PMs results.each do |r| params = yield(r) @@ -406,18 +388,14 @@ class ImportScripts::Base else import_id = params.delete(:id).to_s - if post_id_from_imported_post_id(import_id) + if @lookup.post_id_from_imported_post_id(import_id) skipped += 1 # already imported this post else begin new_post = create_post(params, import_id) if new_post.is_a?(Post) - @existing_posts[import_id] = new_post.id - @topic_lookup[new_post.id] = { - post_number: new_post.post_number, - topic_id: new_post.topic_id, - url: new_post.url, - } + @lookup.add_post(import_id, new_post) + @lookup.add_topic(new_post) created_post(new_post) @@ -439,10 +417,10 @@ class ImportScripts::Base end end - print_status skipped + created + (opts[:offset] || 0), total + print_status(created + skipped + (opts[:offset] || 0), total, start_time) end - return [created, skipped] + [created, skipped] end def create_post(opts, import_id) @@ -463,19 +441,8 @@ class ImportScripts::Base post ? post : post_creator.errors.full_messages end - # Creates an upload. - # Expects path to be the full path and filename of the source file. def create_upload(user_id, path, source_filename) - tmp = Tempfile.new('discourse-upload') - src = File.open(path) - FileUtils.copy_stream(src, tmp) - src.close - tmp.rewind - - Upload.create_for(user_id, tmp, source_filename, tmp.size) - ensure - tmp.close rescue nil - tmp.unlink rescue nil + @uploader.create_upload(user_id, path, source_filename) end # Iterate through a list of bookmark records to be imported. @@ -484,8 +451,8 @@ class ImportScripts::Base # Required fields are :user_id and :post_id, where both ids are # the values in the original datasource. def create_bookmarks(results, opts={}) - bookmarks_created = 0 - bookmarks_skipped = 0 + created = 0 + skipped = 0 total = opts[:total] || results.size user = User.new @@ -495,23 +462,29 @@ class ImportScripts::Base params = yield(result) # only the IDs are needed, so this should be enough - user.id = user_id_from_imported_user_id(params[:user_id]) - post.id = post_id_from_imported_post_id(params[:post_id]) - - if user.id.nil? || post.id.nil? - bookmarks_skipped += 1 - puts "Skipping bookmark for user id #{params[:user_id]} and post id #{params[:post_id]}" + if params.nil? + skipped += 1 else - begin - PostAction.act(user, post, PostActionType.types[:bookmark]) - bookmarks_created += 1 - rescue PostAction::AlreadyActed - bookmarks_skipped += 1 - end + user.id = @lookup.user_id_from_imported_user_id(params[:user_id]) + post.id = @lookup.post_id_from_imported_post_id(params[:post_id]) - print_status bookmarks_created + bookmarks_skipped + (opts[:offset] || 0), total + if user.id.nil? || post.id.nil? + skipped += 1 + puts "Skipping bookmark for user id #{params[:user_id]} and post id #{params[:post_id]}" + else + begin + PostAction.act(user, post, PostActionType.types[:bookmark]) + created += 1 + rescue PostAction::AlreadyActed + skipped += 1 + end + end end + + print_status created + skipped + (opts[:offset] || 0), total end + + [created, skipped] end def close_inactive_topics(opts={}) @@ -633,23 +606,26 @@ class ImportScripts::Base end def html_for_upload(upload, display_filename) - if FileHelper.is_image?(upload.url) - embedded_image_html(upload) - else - attachment_html(upload, display_filename) - end + @uploader.html_for_upload(upload, display_filename) end def embedded_image_html(upload) - %Q[
] + @uploader.embedded_image_html(upload) end def attachment_html(upload, display_filename) - "#{display_filename} (#{number_to_human_size(upload.filesize)})" + @uploader.attachment_html(upload, display_filename) end - def print_status(current, max) - print "\r%9d / %d (%5.1f%%) " % [current, max, ((current.to_f / max.to_f) * 100).round(1)] + def print_status(current, max, start_time = nil) + if start_time.present? + elapsed_seconds = Time.now - start_time + elements_per_minute = '[%.0f items/min] ' % [current / elapsed_seconds.to_f * 60] + else + elements_per_minute = '' + end + + print "\r%9d / %d (%5.1f%%) %s" % [current, max, current / max.to_f * 100, elements_per_minute] end def print_spinner @@ -658,6 +634,10 @@ class ImportScripts::Base print "\b#{@spinner_chars[0]}" end + def get_start_time(key) + @start_times.fetch(key) {|k| @start_times[k] = Time.now} + end + def batches(batch_size) offset = 0 loop do diff --git a/script/import_scripts/base/lookup_container.rb b/script/import_scripts/base/lookup_container.rb new file mode 100644 index 00000000000..0d8070932ae --- /dev/null +++ b/script/import_scripts/base/lookup_container.rb @@ -0,0 +1,99 @@ +module ImportScripts + class LookupContainer + def initialize + puts 'loading existing groups...' + @groups = {} + GroupCustomField.where(name: 'import_id').pluck(:group_id, :value).each do |group_id, import_id| + @groups[import_id] = group_id + end + + puts 'loading existing users...' + @users = {} + UserCustomField.where(name: 'import_id').pluck(:user_id, :value).each do |user_id, import_id| + @users[import_id] = user_id + end + + puts 'loading existing categories...' + @categories = {} + CategoryCustomField.where(name: 'import_id').pluck(:category_id, :value).each do |category_id, import_id| + @categories[import_id] = category_id + end + + puts 'loading existing posts...' + @posts = {} + PostCustomField.where(name: 'import_id').pluck(:post_id, :value).each do |post_id, import_id| + @posts[import_id] = post_id + end + + puts 'loading existing topics...' + @topics = {} + Post.joins(:topic).pluck('posts.id, posts.topic_id, posts.post_number, topics.slug').each do |p| + @topics[p[0]] = { + topic_id: p[1], + post_number: p[2], + url: Post.url(p[3], p[1], p[2]) + } + end + end + + # Get the Discourse Post id based on the id of the source record + def post_id_from_imported_post_id(import_id) + @posts[import_id] || @posts[import_id.to_s] + end + + # Get the Discourse topic info (a hash) based on the id of the source record + def topic_lookup_from_imported_post_id(import_id) + post_id = post_id_from_imported_post_id(import_id) + post_id ? @topics[post_id] : nil + end + + # Get the Discourse Group id based on the id of the source group + def group_id_from_imported_group_id(import_id) + @groups[import_id] || @groups[import_id.to_s] || find_group_by_import_id(import_id).try(:id) + end + + # Get the Discourse Group based on the id of the source group + def find_group_by_import_id(import_id) + GroupCustomField.where(name: 'import_id', value: import_id.to_s).first.try(:group) + end + + # Get the Discourse User id based on the id of the source user + def user_id_from_imported_user_id(import_id) + @users[import_id] || @users[import_id.to_s] || find_user_by_import_id(import_id).try(:id) + end + + # Get the Discourse User based on the id of the source user + def find_user_by_import_id(import_id) + UserCustomField.where(name: 'import_id', value: import_id.to_s).first.try(:user) + end + + # Get the Discourse Category id based on the id of the source category + def category_id_from_imported_category_id(import_id) + @categories[import_id] || @categories[import_id.to_s] + end + + def add_group(import_id, group) + @groups[import_id] = group.id + end + + def add_user(import_id, user) + @users[import_id] = user.id + end + + def add_category(import_id, category) + @categories[import_id] = category.id + end + + def add_post(import_id, post) + @posts[import_id] = post.id + end + + def add_topic(post) + @topics[post.id] = { + post_number: post.post_number, + topic_id: post.topic_id, + url: post.url, + } + end + end +end diff --git a/script/import_scripts/base/uploader.rb b/script/import_scripts/base/uploader.rb new file mode 100644 index 00000000000..62ddac451d7 --- /dev/null +++ b/script/import_scripts/base/uploader.rb @@ -0,0 +1,45 @@ +require_dependency 'url_helper' +require_dependency 'file_helper' + +module ImportScripts + class Uploader + include ActionView::Helpers::NumberHelper + + # Creates an upload. + # Expects path to be the full path and filename of the source file. + # @return [Upload] + def create_upload(user_id, path, source_filename) + tmp = Tempfile.new('discourse-upload') + src = File.open(path) + FileUtils.copy_stream(src, tmp) + src.close + tmp.rewind + + Upload.create_for(user_id, tmp, source_filename, tmp.size) + rescue => e + Rails.logger.error("Failed to create upload: #{e}") + nil + ensure + tmp.close rescue nil + tmp.unlink rescue nil + end + + def html_for_upload(upload, display_filename) + if FileHelper.is_image?(upload.url) + embedded_image_html(upload) + else + attachment_html(upload, display_filename) + end + end + + def embedded_image_html(upload) + image_width = [upload.width, SiteSetting.max_image_width].compact.min + image_height = [upload.height, SiteSetting.max_image_height].compact.min + %Q[
] + end + + def attachment_html(upload, display_filename) + "#{display_filename} (#{number_to_human_size(upload.filesize)})" + end + end +end From 1cb45861c5d1ee5c86b58d950e76d754d8423392 Mon Sep 17 00:00:00 2001 From: Gerhard Schlager Date: Sun, 5 Jul 2015 23:17:03 +0200 Subject: [PATCH 2/2] FEATURE: Lots of improvements to the phpBB3 importer - Extensive refactoring of the existing importer - Configuration of import with settings.yml instead of editing code - Supports importing from phpBB 3.0.x and 3.1.x - Imports all attachments (not just the ones embedded with [attachment]) from posts and private messages - Imports all existing attachments without the need to configure allowed file extensions or file sizes - Imports polls - Imports bookmarks - Imports sticky topics and (global) announcements as pinned topics - Imports categories in the original order and sets the content of the category description topic - Sets the creation date of category description topics to the creation date of the first topic in each category - Imports additional user attributes: last seen date, registration IP address, website, date of birth, location - Optionally set the user's name to its username - Users that didn't activate their account in phpBB3 are imported as inactive users - All imported, active users are automatically approved - Users that were deactivated in phpBB3 get suspended for 200 years during the import - Anonymous user can be imported as suspended users instead of the system user - Forums of type "link" are not imported as categories anymore - Internal links to posts get rewritten during the import (previously only links to topics got rewritten) - Ordered lists with BBCode [list=a] (which are unsupported in Discourse) get imported as if they would be [list=1] - Importing of avatars, attachments, private messages, polls and bookmarks can be disabled via configuration file - Optional fixing of private messages for forums that have been upgraded from phpBB2 prevents the import of duplicate messages and tries to group related messages into topics - Table prefix (default: phpbb) is configurable - Most of phpBB's default smilies are mapped to Emojis and all other smilies get uploaded and embedded as images. Smiley mappings can be added or overridden in the settings.yml file. --- script/import_scripts/phpbb3.rb | 511 +----------------- .../phpbb3/database/database.rb | 56 ++ .../phpbb3/database/database_3_0.rb | 333 ++++++++++++ .../phpbb3/database/database_3_1.rb | 26 + .../phpbb3/database/database_base.rb | 24 + script/import_scripts/phpbb3/importer.rb | 152 ++++++ .../phpbb3/importers/attachment_importer.rb | 36 ++ .../phpbb3/importers/avatar_importer.rb | 107 ++++ .../phpbb3/importers/bookmark_importer.rb | 10 + .../phpbb3/importers/category_importer.rb | 47 ++ .../phpbb3/importers/importer_factory.rb | 69 +++ .../phpbb3/importers/message_importer.rb | 83 +++ .../phpbb3/importers/poll_importer.rb | 155 ++++++ .../phpbb3/importers/post_importer.rb | 79 +++ .../phpbb3/importers/user_importer.rb | 97 ++++ script/import_scripts/phpbb3/settings.yml | 59 ++ .../phpbb3/support/constants.rb | 35 ++ .../import_scripts/phpbb3/support/settings.rb | 78 +++ .../phpbb3/support/smiley_processor.rb | 90 +++ .../phpbb3/support/text_processor.rb | 133 +++++ 20 files changed, 1696 insertions(+), 484 deletions(-) create mode 100644 script/import_scripts/phpbb3/database/database.rb create mode 100644 script/import_scripts/phpbb3/database/database_3_0.rb create mode 100644 script/import_scripts/phpbb3/database/database_3_1.rb create mode 100644 script/import_scripts/phpbb3/database/database_base.rb create mode 100644 script/import_scripts/phpbb3/importer.rb create mode 100644 script/import_scripts/phpbb3/importers/attachment_importer.rb create mode 100644 script/import_scripts/phpbb3/importers/avatar_importer.rb create mode 100644 script/import_scripts/phpbb3/importers/bookmark_importer.rb create mode 100644 script/import_scripts/phpbb3/importers/category_importer.rb create mode 100644 script/import_scripts/phpbb3/importers/importer_factory.rb create mode 100644 script/import_scripts/phpbb3/importers/message_importer.rb create mode 100644 script/import_scripts/phpbb3/importers/poll_importer.rb create mode 100644 script/import_scripts/phpbb3/importers/post_importer.rb create mode 100644 script/import_scripts/phpbb3/importers/user_importer.rb create mode 100644 script/import_scripts/phpbb3/settings.yml create mode 100644 script/import_scripts/phpbb3/support/constants.rb create mode 100644 script/import_scripts/phpbb3/support/settings.rb create mode 100644 script/import_scripts/phpbb3/support/smiley_processor.rb create mode 100644 script/import_scripts/phpbb3/support/text_processor.rb diff --git a/script/import_scripts/phpbb3.rb b/script/import_scripts/phpbb3.rb index 373db6e755b..639b51d7852 100644 --- a/script/import_scripts/phpbb3.rb +++ b/script/import_scripts/phpbb3.rb @@ -1,486 +1,29 @@ -require "mysql2" -require File.expand_path(File.dirname(__FILE__) + "/base.rb") - -class ImportScripts::PhpBB3 < ImportScripts::Base - - PHPBB_DB = "phpbb" - BATCH_SIZE = 1000 - - ORIGINAL_SITE_PREFIX = "oldsite.example.com/forums" # without http(s):// - NEW_SITE_PREFIX = "http://discourse.example.com" # with http:// or https:// - - # Set PHPBB_BASE_DIR to the base directory of your phpBB installation. - # When importing, you should place the subdirectories "files" (containing all - # attachments) and "images" (containing avatars) in PHPBB_BASE_DIR. - # If nil, [attachment] tags and avatars won't be processed. - # Edit AUTHORIZED_EXTENSIONS as needed. - # If you used ATTACHMENTS_BASE_DIR before, e.g. ATTACHMENTS_BASE_DIR = '/var/www/phpbb/files/' - # would become PHPBB_BASE_DIR = '/var/www/phpbb' - # now. - PHPBB_BASE_DIR = '/var/www/phpbb' - AUTHORIZED_EXTENSIONS = ['jpg', 'jpeg', 'png', 'gif', 'zip', 'rar', 'pdf'] - - # Avatar types to import.: - # 1 = uploaded avatars (you should probably leave this here) - # 2 = hotlinked avatars - WARNING: this will considerably slow down your import - # if there are many hotlinked avatars and some of them unavailable! - # 3 = galery avatars (the predefined avatars phpBB offers. They will be converted to uploaded avatars) - IMPORT_AVATARS = [1, 3] - - def initialize - super - - @client = Mysql2::Client.new( - host: "localhost", - username: "root", - #password: "password", - database: PHPBB_DB - ) - phpbb_read_config - end - - def execute - import_users - import_categories - import_posts - import_private_messages - import_attachments unless PHPBB_BASE_DIR.nil? - suspend_users - end - - def import_users - puts '', "creating users" - - total_count = mysql_query("SELECT count(*) count - FROM phpbb_users u - JOIN phpbb_groups g ON g.group_id = u.group_id - WHERE g.group_name != 'BOTS' - AND u.user_type != 1;").first['count'] - - batches(BATCH_SIZE) do |offset| - results = mysql_query( - "SELECT user_id id, user_email email, username, user_regdate, group_name, user_avatar_type, user_avatar - FROM phpbb_users u - JOIN phpbb_groups g ON g.group_id = u.group_id - WHERE g.group_name != 'BOTS' - AND u.user_type != 1 - ORDER BY u.user_id ASC - LIMIT #{BATCH_SIZE} - OFFSET #{offset};") - - break if results.size < 1 - - create_users(results, total: total_count, offset: offset) do |user| - { id: user['id'], - email: user['email'], - username: user['username'], - created_at: Time.zone.at(user['user_regdate']), - moderator: user['group_name'] == 'GLOBAL_MODERATORS', - admin: user['group_name'] == 'ADMINISTRATORS', - post_create_action: proc do |newmember| - if not PHPBB_BASE_DIR.nil? and IMPORT_AVATARS.include?(user['user_avatar_type']) and newmember.uploaded_avatar_id.blank? - path = phpbb_avatar_fullpath(user['user_avatar_type'], user['user_avatar']) - if path - begin - upload = create_upload(newmember.id, path, user['user_avatar']) - if upload.persisted? - newmember.import_mode = false - newmember.create_user_avatar - newmember.import_mode = true - newmember.user_avatar.update(custom_upload_id: upload.id) - newmember.update(uploaded_avatar_id: upload.id) - else - puts "Error: Upload did not persist!" - end - rescue SystemCallError => err - puts "Could not import avatar: #{err.message}" - end - end - end - end - } - end - end - end - - def import_categories - results = mysql_query(" - SELECT forum_id id, parent_id, left(forum_name, 50) name, forum_desc description - FROM phpbb_forums - ORDER BY parent_id ASC, forum_id ASC - ") - - create_categories(results) do |row| - h = {id: row['id'], name: CGI.unescapeHTML(row['name']), description: CGI.unescapeHTML(row['description'])} - if row['parent_id'].to_i > 0 - h[:parent_category_id] = category_id_from_imported_category_id(row['parent_id']) - end - h - end - end - - def import_posts - puts "", "creating topics and posts" - - total_count = mysql_query("SELECT count(*) count from phpbb_posts").first["count"] - - batches(BATCH_SIZE) do |offset| - results = mysql_query(" - SELECT p.post_id id, - p.topic_id topic_id, - t.forum_id category_id, - t.topic_title title, - t.topic_first_post_id first_post_id, - p.poster_id user_id, - p.post_text raw, - p.post_time post_time - FROM phpbb_posts p, - phpbb_topics t - WHERE p.topic_id = t.topic_id - ORDER BY id - LIMIT #{BATCH_SIZE} - OFFSET #{offset}; - ") - - break if results.size < 1 - - create_posts(results, total: total_count, offset: offset) do |m| - skip = false - mapped = {} - - mapped[:id] = m['id'] - mapped[:user_id] = user_id_from_imported_user_id(m['user_id']) || -1 - mapped[:raw] = process_phpbb_post(m['raw'], m['id']) - mapped[:created_at] = Time.zone.at(m['post_time']) - - if m['id'] == m['first_post_id'] - mapped[:category] = category_id_from_imported_category_id(m['category_id']) - mapped[:title] = CGI.unescapeHTML(m['title']) - else - parent = topic_lookup_from_imported_post_id(m['first_post_id']) - if parent - mapped[:topic_id] = parent[:topic_id] - else - puts "Parent post #{m['first_post_id']} doesn't exist. Skipping #{m["id"]}: #{m["title"][0..40]}" - skip = true - end - end - - skip ? nil : mapped - end - end - end - - def import_private_messages - puts "", "creating private messages" - - total_count = mysql_query("SELECT count(*) count from phpbb_privmsgs").first["count"] - - batches(BATCH_SIZE) do |offset| - results = mysql_query(" - SELECT msg_id id, - root_level, - author_id user_id, - message_time, - message_subject, - message_text - FROM phpbb_privmsgs - ORDER BY root_level ASC, msg_id ASC - LIMIT #{BATCH_SIZE} - OFFSET #{offset}; - ") - - break if results.size < 1 - - create_posts(results, total: total_count, offset: offset) do |m| - skip = false - mapped = {} - - mapped[:id] = "pm:#{m['id']}" - mapped[:user_id] = user_id_from_imported_user_id(m['user_id']) || -1 - mapped[:raw] = process_phpbb_post(m['message_text'], m['id']) - mapped[:created_at] = Time.zone.at(m['message_time']) - - if m['root_level'] == 0 - mapped[:title] = CGI.unescapeHTML(m['message_subject']) - mapped[:archetype] = Archetype.private_message - - # Find the users who are part of this private message. - # Found from the to_address of phpbb_privmsgs, by looking at - # all the rows with the same root_level. - # to_address looks like this: "u_91:u_1234:u_200" - # The "u_" prefix is discarded and the rest is a user_id. - - import_user_ids = mysql_query(" - SELECT to_address - FROM phpbb_privmsgs - WHERE msg_id = #{m['id']} - OR root_level = #{m['id']}").map { |r| r['to_address'].split(':') }.flatten!.map! { |u| u[2..-1] } - - mapped[:target_usernames] = import_user_ids.map! do |import_user_id| - import_user_id.to_s == m['user_id'].to_s ? nil : User.find_by_id(user_id_from_imported_user_id(import_user_id)).try(:username) - end.compact.uniq - - skip = true if mapped[:target_usernames].empty? # pm with yourself? - else - parent = topic_lookup_from_imported_post_id("pm:#{m['root_level']}") - if parent - mapped[:topic_id] = parent[:topic_id] - else - puts "Parent post pm:#{m['root_level']} doesn't exist. Skipping #{m["id"]}: #{m["message_subject"][0..40]}" - skip = true - end - end - - skip ? nil : mapped - end - end - end - - def suspend_users - puts '', "updating banned users" - - where = "ban_userid > 0 AND (ban_end = 0 OR ban_end > #{Time.zone.now.to_i})" - - banned = 0 - failed = 0 - total = mysql_query("SELECT count(*) count FROM phpbb_banlist WHERE #{where}").first['count'] - - system_user = Discourse.system_user - - mysql_query("SELECT ban_userid, ban_start, ban_end, ban_give_reason FROM phpbb_banlist WHERE #{where}").each do |b| - user = find_user_by_import_id(b['ban_userid']) - if user - user.suspended_at = Time.zone.at(b['ban_start']) - user.suspended_till = b['ban_end'] > 0 ? Time.zone.at(b['ban_end']) : 200.years.from_now - - if user.save - StaffActionLogger.new(system_user).log_user_suspend(user, b['ban_give_reason']) - banned += 1 - else - puts "Failed to suspend user #{user.username}. #{user.errors.try(:full_messages).try(:inspect)}" - failed += 1 - end - else - puts "Not found: #{b['ban_userid']}" - failed += 1 - end - - print_status banned + failed, total - end - end - - def process_phpbb_post(raw, import_id) - s = raw.dup - - # :) is encoded as :) - s.gsub!(/]+) \/>/, '\1') - - # Internal forum links of this form: viewtopic.php?f=26&t=3412 - s.gsub!(/viewtopic(?:.*)t=(\d+)<\/a>/) do |phpbb_link| - replace_internal_link(phpbb_link, $1, import_id) - end - - # Some links look like this: http://www.onegameamonth.com - s.gsub!(/(.+)<\/a>/, '[\2](\1)') - - # Many phpbb bbcode tags have a hash attached to them. Examples: - # [url=https://google.com:1qh1i7ky]click here[/url:1qh1i7ky] - # [quote="cybereality":b0wtlzex]Some text.[/quote:b0wtlzex] - s.gsub!(/:(?:\w{8})\]/, ']') - - s = CGI.unescapeHTML(s) - - # phpBB shortens link text like this, which breaks our markdown processing: - # [http://answers.yahoo.com/question/index ... 223AAkkPli](http://answers.yahoo.com/question/index?qid=20070920134223AAkkPli) - # - # Work around it for now: - s.gsub!(/\[http(s)?:\/\/(www\.)?/, '[') - - # Replace internal forum links that aren't in the format - s.gsub!(internal_url_regexp) do |phpbb_link| - replace_internal_link(phpbb_link, $1, import_id) - end - # convert list tags to ul and list=1 tags to ol - # (basically, we're only missing list=a here...) - s.gsub!(/\[list\](.*?)\[\/list:u\]/m, '[ul]\1[/ul]') - s.gsub!(/\[list=1\](.*?)\[\/list:o\]/m, '[ol]\1[/ol]') - # convert *-tags to li-tags so bbcode-to-md can do its magic on phpBB's lists: - s.gsub!(/\[\*\](.*?)\[\/\*:m\]/, '[li]\1[/li]') - - s - end - - def replace_internal_link(phpbb_link, import_topic_id, from_import_post_id) - results = mysql_query("select topic_first_post_id from phpbb_topics where topic_id = #{import_topic_id}") - - return phpbb_link unless results.size > 0 - - linked_topic_id = results.first['topic_first_post_id'] - lookup = topic_lookup_from_imported_post_id(linked_topic_id) - - return phpbb_link unless lookup - - t = Topic.find_by_id(lookup[:topic_id]) - if t - "#{NEW_SITE_PREFIX}/t/#{t.slug}/#{t.id}" - else - phpbb_link - end - end - - def internal_url_regexp - @internal_url_regexp ||= Regexp.new("http(?:s)?://#{ORIGINAL_SITE_PREFIX.gsub('.', '\.')}/viewtopic\\.php?(?:\\S*)t=(\\d+)") - end - - # This step is done separately because it can take multiple attempts to get right (because of - # missing files, wrong paths, authorized extensions, etc.). - def import_attachments - setting = AUTHORIZED_EXTENSIONS.join('|') - SiteSetting.authorized_extensions = setting if setting != SiteSetting.authorized_extensions - - r = /\[attachment=[\d]+\]<\!-- [\w]+ --\>([^<]+)<\!-- [\w]+ --\>\[\/attachment\]/ - - user = Discourse.system_user - - current_count = 0 - total_count = Post.count - success_count = 0 - fail_count = 0 - - puts '', "Importing attachments...", '' - - Post.find_each do |post| - current_count += 1 - print_status current_count, total_count - - new_raw = post.raw.dup - new_raw.gsub!(r) do |s| - matches = r.match(s) - real_filename = matches[1] - - # note: currently, we do not import PM attachments. - # If this should be desired, this has to be fixed, - # otherwise, the SQL state coughs up an error for the - # clause "WHERE post_msg_id = pm12345"... - next s if post.custom_fields['import_id'].start_with?('pm:') - - sql = "SELECT physical_filename, - mimetype - FROM phpbb_attachments - WHERE post_msg_id = #{post.custom_fields['import_id']} - AND real_filename = '#{real_filename}';" - - begin - results = mysql_query(sql) - rescue Mysql2::Error => e - puts "SQL Error" - puts e.message - puts sql - fail_count += 1 - next s - end - - row = results.first - if !row - puts "Couldn't find phpbb_attachments record for post.id = #{post.id}, import_id = #{post.custom_fields['import_id']}, real_filename = #{real_filename}" - fail_count += 1 - next s - end - - filename = File.join(PHPBB_BASE_DIR+'/files', row['physical_filename']) - if !File.exists?(filename) - puts "Attachment file doesn't exist: #{filename}" - fail_count += 1 - next s - end - - upload = create_upload(user.id, filename, real_filename) - - if upload.nil? || !upload.valid? - puts "Upload not valid :(" - puts upload.errors.inspect if upload - fail_count += 1 - next s - end - - success_count += 1 - - html_for_upload(upload, real_filename) - end - - if new_raw != post.raw - PostRevisor.new(post).revise!(post.user, { raw: new_raw }, { bypass_bump: true, edit_reason: 'Migrate from PHPBB3' }) - end - end - - puts '', '' - puts "succeeded: #{success_count}" - puts " failed: #{fail_count}" if fail_count > 0 - puts '' - end - - # Read avatar config from phpBB configuration table. - # Stored there: - paths relative to the phpBB install path - # - "salt", i.e. base filename for uploaded avatars - # - def phpbb_read_config - results = mysql_query("SELECT config_name, config_value - FROM phpbb_config;") - if results.size<1 - puts "could not read config... no avatars and attachments will be imported!" - return - end - results.each do |result| - if result['config_name']=='avatar_gallery_path' - @avatar_gallery_path = result['config_value'] - elsif result['config_name']=='avatar_path' - @avatar_path = result['config_value'] - elsif result['config_name']=='avatar_salt' - @avatar_salt = result['config_value'] - end - end - end - - # Create the full path to the phpBB avatar specified by avatar_type and filename. - # - def phpbb_avatar_fullpath(avatar_type, filename) - case avatar_type - when 1 # uploaded avatar - filename.gsub!(/_[0-9]+\./,'.') # we need 1337.jpg, not 1337_2983745.jpg - path=@avatar_path - PHPBB_BASE_DIR+'/'+path+'/'+@avatar_salt+'_'+filename - when 3 # gallery avatar - path=@avatar_gallery_path - PHPBB_BASE_DIR+'/'+path+'/'+filename - when 2 # hotlinked avatar - begin - hotlinked = FileHelper.download(filename, SiteSetting.max_image_size_kb.kilobytes, "discourse-hotlinked") - rescue StandardError => err - puts "Error downloading avatar: #{err.message}. Skipping..." - return nil - end - if hotlinked - if hotlinked.size <= SiteSetting.max_image_size_kb.kilobytes - return hotlinked - else - Rails.logger.error("Failed to pull hotlinked image: #{filename} - Image is bigger than #{@max_size}") - nil - end - else - Rails.logger.error("There was an error while downloading '#{filename}' locally.") - nil - end - else - puts 'Invalid avatar type #{avatar_type}, skipping' - nil - end - end - - - def mysql_query(sql) - @client.query(sql, cache_rows: false) - end +if ARGV.length != 1 || !File.exists?(ARGV[0]) + STDERR.puts '', 'Usage of phpBB3 importer:', 'bundle exec ruby phpbb3.rb ' + STDERR.puts '', "Use the settings file from #{File.expand_path('phpbb3/settings.yml', File.dirname(__FILE__))} as an example." + exit 1 end -ImportScripts::PhpBB3.new.perform +module ImportScripts + module PhpBB3 + require_relative 'phpbb3/support/settings' + require_relative 'phpbb3/database/database' + + @settings = Settings.load(ARGV[0]) + + # We need to load the gem files for ruby-bbcode-to-md and the database adapter + # (e.g. mysql2) before bundler gets initialized by the base importer. + # Otherwise we get an error since those gems are not always in the Gemfile. + require 'ruby-bbcode-to-md' if @settings.use_bbcode_to_md + + begin + @database = Database.create(@settings.database) + rescue UnsupportedVersionError => error + STDERR.puts '', error.message + exit 1 + end + + require_relative 'phpbb3/importer' + Importer.new(@settings, @database).perform + end +end diff --git a/script/import_scripts/phpbb3/database/database.rb b/script/import_scripts/phpbb3/database/database.rb new file mode 100644 index 00000000000..731f05b8a12 --- /dev/null +++ b/script/import_scripts/phpbb3/database/database.rb @@ -0,0 +1,56 @@ +require 'mysql2' + +module ImportScripts::PhpBB3 + class Database + # @param database_settings [ImportScripts::PhpBB3::DatabaseSettings] + def self.create(database_settings) + Database.new(database_settings).create_database + end + + # @param database_settings [ImportScripts::PhpBB3::DatabaseSettings] + def initialize(database_settings) + @database_settings = database_settings + @database_client = create_database_client + end + + # @return [ImportScripts::PhpBB3::Database_3_0 | ImportScripts::PhpBB3::Database_3_1] + def create_database + version = get_phpbb_version + + if version.start_with?('3.0') + require_relative 'database_3_0' + Database_3_0.new(@database_client, @database_settings) + elsif version.start_with?('3.1') + require_relative 'database_3_1' + Database_3_1.new(@database_client, @database_settings) + else + raise UnsupportedVersionError, "Unsupported version (#{version}) of phpBB detected.\n" \ + << 'Currently only 3.0.x and 3.1.x are supported by this importer.' + end + end + + protected + + def create_database_client + Mysql2::Client.new( + host: @database_settings.host, + username: @database_settings.username, + password: @database_settings.password, + database: @database_settings.schema + ) + end + + def get_phpbb_version + table_prefix = @database_settings.table_prefix + + @database_client.query(<<-SQL, cache_rows: false, symbolize_keys: true).first[:config_value] + SELECT config_value + FROM #{table_prefix}_config + WHERE config_name = 'version' + SQL + end + end + + class UnsupportedVersionError < RuntimeError; + end +end diff --git a/script/import_scripts/phpbb3/database/database_3_0.rb b/script/import_scripts/phpbb3/database/database_3_0.rb new file mode 100644 index 00000000000..d4115cc38e1 --- /dev/null +++ b/script/import_scripts/phpbb3/database/database_3_0.rb @@ -0,0 +1,333 @@ +require_relative 'database_base' +require_relative '../support/constants' + +module ImportScripts::PhpBB3 + class Database_3_0 < DatabaseBase + def count_users + count(<<-SQL) + SELECT COUNT(*) AS count + FROM #{@table_prefix}_users u + JOIN #{@table_prefix}_groups g ON g.group_id = u.group_id + WHERE u.user_type != #{Constants::USER_TYPE_IGNORE} + SQL + end + + def fetch_users(offset) + query(<<-SQL) + SELECT u.user_id, u.user_email, u.username, u.user_regdate, u.user_lastvisit, u.user_ip, + u.user_type, u.user_inactive_reason, g.group_name, b.ban_start, b.ban_end, b.ban_reason, + u.user_posts, u.user_website, u.user_from, u.user_birthday, u.user_avatar_type, u.user_avatar + FROM #{@table_prefix}_users u + JOIN #{@table_prefix}_groups g ON (g.group_id = u.group_id) + LEFT OUTER JOIN #{@table_prefix}_banlist b ON ( + u.user_id = b.ban_userid AND b.ban_exclude = 0 AND + (b.ban_end = 0 OR b.ban_end >= UNIX_TIMESTAMP()) + ) + WHERE u.user_type != #{Constants::USER_TYPE_IGNORE} + ORDER BY u.user_id ASC + LIMIT #{@batch_size} + OFFSET #{offset} + SQL + end + + def count_anonymous_users + count(<<-SQL) + SELECT COUNT(DISTINCT post_username) AS count + FROM #{@table_prefix}_posts + WHERE post_username <> '' + SQL + end + + def fetch_anonymous_users(offset) + query(<<-SQL) + SELECT post_username, MIN(post_time) AS first_post_time + FROM #{@table_prefix}_posts + WHERE post_username <> '' + GROUP BY post_username + ORDER BY post_username ASC + LIMIT #{@batch_size} + OFFSET #{offset} + SQL + end + + def fetch_categories + query(<<-SQL) + SELECT f.forum_id, f.parent_id, f.forum_name, f.forum_name, f.forum_desc, x.first_post_time + FROM phpbb_forums f + LEFT OUTER JOIN ( + SELECT MIN(topic_time) AS first_post_time, forum_id + FROM phpbb_topics + GROUP BY forum_id + ) x ON (f.forum_id = x.forum_id) + WHERE f.forum_type != #{Constants::FORUM_TYPE_LINK} + ORDER BY f.parent_id ASC, f.left_id ASC + SQL + end + + def count_posts + count(<<-SQL) + SELECT COUNT(*) AS count + FROM #{@table_prefix}_posts + SQL + end + + def fetch_posts(offset) + query(<<-SQL) + SELECT p.post_id, p.topic_id, t.forum_id, t.topic_title, t.topic_first_post_id, p.poster_id, + p.post_text, p.post_time, p.post_username, t.topic_status, t.topic_type, t.poll_title, + CASE WHEN t.poll_length > 0 THEN t.poll_start + t.poll_length ELSE NULL END AS poll_end, + t.poll_max_options, p.post_attachment + FROM #{@table_prefix}_posts p + JOIN #{@table_prefix}_topics t ON (p.topic_id = t.topic_id) + ORDER BY p.post_id ASC + LIMIT #{@batch_size} + OFFSET #{offset} + SQL + end + + def get_first_post_id(topic_id) + query(<<-SQL).first[:topic_first_post_id] + SELECT topic_first_post_id + FROM #{@table_prefix}_topics + WHERE topic_id = #{topic_id} + SQL + end + + def fetch_poll_options(topic_id) + query(<<-SQL) + SELECT poll_option_id, poll_option_text, poll_option_total + FROM #{@table_prefix}_poll_options + WHERE topic_id = #{topic_id} + ORDER BY poll_option_id + SQL + end + + def fetch_poll_votes(topic_id) + # this query ignores votes from users that do not exist anymore + query(<<-SQL) + SELECT u.user_id, v.poll_option_id + FROM #{@table_prefix}_poll_votes v + JOIN #{@table_prefix}_users u ON (v.vote_user_id = u.user_id) + WHERE v.topic_id = #{topic_id} + SQL + end + + def count_voters(topic_id) + # anonymous voters can't be counted, but lets try to make the count look "correct" anyway + count(<<-SQL) + SELECT MAX(count) AS count + FROM ( + SELECT COUNT(DISTINCT vote_user_id) AS count + FROM #{@table_prefix}_poll_votes + WHERE topic_id = #{topic_id} + UNION + SELECT MAX(poll_option_total) AS count + FROM #{@table_prefix}_poll_options + WHERE topic_id = #{topic_id} + ) x + SQL + end + + def get_max_attachment_size + query(<<-SQL).first[:filesize] + SELECT IFNULL(MAX(filesize), 0) AS filesize + FROM #{@table_prefix}_attachments + SQL + end + + def fetch_attachments(topic_id, post_id) + query(<<-SQL) + SELECT physical_filename, real_filename + FROM #{@table_prefix}_attachments + WHERE topic_id = #{topic_id} AND post_msg_id = #{post_id} + ORDER BY filetime DESC, post_msg_id ASC + SQL + end + + def count_messages(use_fixed_messages) + if use_fixed_messages + count(<<-SQL) + SELECT COUNT(*) AS count + FROM #{@table_prefix}_import_privmsgs + SQL + else + count(<<-SQL) + SELECT COUNT(*) AS count + FROM #{@table_prefix}_privmsgs + SQL + end + end + + def fetch_messages(use_fixed_messages, offset) + if use_fixed_messages + query(<<-SQL) + SELECT m.msg_id, i.root_msg_id, m.author_id, m.message_time, m.message_subject, m.message_text, + IFNULL(a.attachment_count, 0) AS attachment_count + FROM #{@table_prefix}_privmsgs m + JOIN #{@table_prefix}_import_privmsgs i ON (m.msg_id = i.msg_id) + LEFT OUTER JOIN ( + SELECT post_msg_id, COUNT(*) AS attachment_count + FROM #{@table_prefix}_attachments + WHERE topic_id = 0 + GROUP BY post_msg_id + ) a ON (m.msg_id = a.post_msg_id) + ORDER BY i.root_msg_id ASC, m.msg_id ASC + LIMIT #{@batch_size} + OFFSET #{offset} + SQL + else + query(<<-SQL) + SELECT m.msg_id, m.root_level AS root_msg_id, m.author_id, m.message_time, m.message_subject, + m.message_text, IFNULL(a.attachment_count, 0) AS attachment_count + FROM #{@table_prefix}_privmsgs m + LEFT OUTER JOIN ( + SELECT post_msg_id, COUNT(*) AS attachment_count + FROM #{@table_prefix}_attachments + WHERE topic_id = 0 + GROUP BY post_msg_id + ) a ON (m.msg_id = a.post_msg_id) + ORDER BY m.root_level ASC, m.msg_id ASC + LIMIT #{@batch_size} + OFFSET #{offset} + SQL + end + end + + def fetch_message_participants(msg_id, use_fixed_messages) + if use_fixed_messages + query(<<-SQL) + SELECT m.to_address + FROM #{@table_prefix}_privmsgs m + JOIN #{@table_prefix}_import_privmsgs i ON (m.msg_id = i.msg_id) + WHERE i.msg_id = #{msg_id} OR i.root_msg_id = #{msg_id} + SQL + else + query(<<-SQL) + SELECT m.to_address + FROM #{@table_prefix}_privmsgs m + WHERE m.msg_id = #{msg_id} OR m.root_level = #{msg_id} + SQL + end + end + + def calculate_fixed_messages + drop_temp_import_message_table + create_temp_import_message_table + fill_temp_import_message_table + + drop_import_message_table + create_import_message_table + fill_import_message_table + + drop_temp_import_message_table + end + + def count_bookmarks + count(<<-SQL) + SELECT COUNT(*) AS count + FROM #{@table_prefix}_bookmarks + SQL + end + + def fetch_bookmarks(offset) + query(<<-SQL) + SELECT b.user_id, t.topic_first_post_id + FROM #{@table_prefix}_bookmarks b + JOIN #{@table_prefix}_topics t ON (b.topic_id = t.topic_id) + ORDER BY b.user_id ASC, b.topic_id ASC + LIMIT #{@batch_size} + OFFSET #{offset} + SQL + end + + def get_config_values + query(<<-SQL).first + SELECT + (SELECT config_value FROM #{@table_prefix}_config WHERE config_name = 'version') AS phpbb_version, + (SELECT config_value FROM #{@table_prefix}_config WHERE config_name = 'avatar_gallery_path') AS avatar_gallery_path, + (SELECT config_value FROM #{@table_prefix}_config WHERE config_name = 'avatar_path') AS avatar_path, + (SELECT config_value FROM #{@table_prefix}_config WHERE config_name = 'avatar_salt') AS avatar_salt, + (SELECT config_value FROM #{@table_prefix}_config WHERE config_name = 'smilies_path') AS smilies_path, + (SELECT config_value FROM #{@table_prefix}_config WHERE config_name = 'upload_path') AS attachment_path + SQL + end + + protected + + def drop_temp_import_message_table + query("DROP TABLE IF EXISTS #{@table_prefix}_import_privmsgs_temp") + end + + def create_temp_import_message_table + query(<<-SQL) + CREATE TABLE #{@table_prefix}_import_privmsgs_temp ( + msg_id MEDIUMINT(8) NOT NULL, + root_msg_id MEDIUMINT(8) NOT NULL, + recipient_id MEDIUMINT(8), + normalized_subject VARCHAR(255) NOT NULL, + PRIMARY KEY (msg_id) + ) + SQL + end + + # this removes duplicate messages, converts the to_address to a number + # and stores the message_subject in lowercase and without the prefix "Re: " + def fill_temp_import_message_table + query(<<-SQL) + INSERT INTO #{@table_prefix}_import_privmsgs_temp (msg_id, root_msg_id, recipient_id, normalized_subject) + SELECT m.msg_id, m.root_level, + CASE WHEN m.root_level = 0 AND INSTR(m.to_address, ':') = 0 THEN + CAST(SUBSTRING(m.to_address, 3) AS SIGNED INTEGER) + ELSE NULL END AS recipient_id, + LOWER(CASE WHEN m.message_subject LIKE 'Re: %' THEN + SUBSTRING(m.message_subject, 5) + ELSE m.message_subject END) AS normalized_subject + FROM #{@table_prefix}_privmsgs m + WHERE NOT EXISTS ( + SELECT 1 + FROM #{@table_prefix}_privmsgs x + WHERE x.msg_id < m.msg_id AND x.root_level = m.root_level AND x.author_id = m.author_id + AND x.to_address = m.to_address AND x.message_time = m.message_time + ) + SQL + end + + def drop_import_message_table + query("DROP TABLE IF EXISTS #{@table_prefix}_import_privmsgs") + end + + def create_import_message_table + query(<<-SQL) + CREATE TABLE #{@table_prefix}_import_privmsgs ( + msg_id MEDIUMINT(8) NOT NULL, + root_msg_id MEDIUMINT(8) NOT NULL, + PRIMARY KEY (msg_id), + INDEX #{@table_prefix}_import_privmsgs_root_msg_id (root_msg_id) + ) + SQL + end + + # this tries to calculate the actual root_level (= msg_id of the first message in a + # private conversation) based on subject, time, author and recipient + def fill_import_message_table + query(<<-SQL) + INSERT INTO #{@table_prefix}_import_privmsgs (msg_id, root_msg_id) + SELECT m.msg_id, CASE WHEN i.root_msg_id = 0 THEN + COALESCE(( + SELECT a.msg_id + FROM #{@table_prefix}_privmsgs a + JOIN #{@table_prefix}_import_privmsgs_temp b ON (a.msg_id = b.msg_id) + WHERE ((a.author_id = m.author_id AND b.recipient_id = i.recipient_id) OR + (a.author_id = i.recipient_id AND b.recipient_id = m.author_id)) + AND b.normalized_subject = i.normalized_subject + AND a.msg_id <> m.msg_id + AND a.message_time < m.message_time + ORDER BY a.message_time ASC + LIMIT 1 + ), 0) ELSE i.root_msg_id END AS root_msg_id + FROM #{@table_prefix}_privmsgs m + JOIN #{@table_prefix}_import_privmsgs_temp i ON (m.msg_id = i.msg_id) + SQL + end + end +end diff --git a/script/import_scripts/phpbb3/database/database_3_1.rb b/script/import_scripts/phpbb3/database/database_3_1.rb new file mode 100644 index 00000000000..bf13546e2d0 --- /dev/null +++ b/script/import_scripts/phpbb3/database/database_3_1.rb @@ -0,0 +1,26 @@ +require_relative 'database_3_0' +require_relative '../support/constants/constants' + +module ImportScripts::PhpBB3 + class Database_3_1 < Database_3_0 + def fetch_users(offset) + query(<<-SQL) + SELECT u.user_id, u.user_email, u.username, u.user_regdate, u.user_lastvisit, u.user_ip, + u.user_type, u.user_inactive_reason, g.group_name, b.ban_start, b.ban_end, b.ban_reason, + u.user_posts, f.pf_phpbb_website AS user_website, f.pf_phpbb_location AS user_from, + u.user_birthday, u.user_avatar_type, u.user_avatar + FROM #{@table_prefix}_users u + JOIN #{@table_prefix}_profile_fields_data f ON (u.user_id = f.user_id) + JOIN #{@table_prefix}_groups g ON (g.group_id = u.group_id) + LEFT OUTER JOIN #{@table_prefix}_banlist b ON ( + u.user_id = b.ban_userid AND b.ban_exclude = 0 AND + (b.ban_end = 0 OR b.ban_end >= UNIX_TIMESTAMP()) + ) + WHERE u.user_type != #{Constants::USER_TYPE_IGNORE} + ORDER BY u.user_id ASC + LIMIT #{@batch_size} + OFFSET #{offset} + SQL + end + end +end diff --git a/script/import_scripts/phpbb3/database/database_base.rb b/script/import_scripts/phpbb3/database/database_base.rb new file mode 100644 index 00000000000..3c8b4b37181 --- /dev/null +++ b/script/import_scripts/phpbb3/database/database_base.rb @@ -0,0 +1,24 @@ +module ImportScripts::PhpBB3 + class DatabaseBase + # @param database_client [Mysql2::Client] + # @param database_settings [ImportScripts::PhpBB3::DatabaseSettings] + def initialize(database_client, database_settings) + @database_client = database_client + + @batch_size = database_settings.batch_size + @table_prefix = database_settings.table_prefix + end + + protected + + # Executes a database query. + def query(sql) + @database_client.query(sql, cache_rows: false, symbolize_keys: true) + end + + # Executes a database query and returns the value of the 'count' column. + def count(sql) + query(sql).first[:count] + end + end +end diff --git a/script/import_scripts/phpbb3/importer.rb b/script/import_scripts/phpbb3/importer.rb new file mode 100644 index 00000000000..28c2ed6fa63 --- /dev/null +++ b/script/import_scripts/phpbb3/importer.rb @@ -0,0 +1,152 @@ +require_relative '../base' +require_relative 'support/settings' +require_relative 'database/database' +require_relative 'importers/importer_factory' + +module ImportScripts::PhpBB3 + class Importer < ImportScripts::Base + # @param settings [ImportScripts::PhpBB3::Settings] + # @param database [ImportScripts::PhpBB3::Database_3_0 | ImportScripts::PhpBB3::Database_3_1] + def initialize(settings, database) + @settings = settings + super() + + @database = database + @php_config = database.get_config_values + @importers = ImporterFactory.new(@database, @lookup, @uploader, @settings, @php_config) + end + + def perform + super if settings_check_successful? + end + + protected + + def execute + puts '', "importing from phpBB #{@php_config[:phpbb_version]}" + + import_users + import_anonymous_users if @settings.import_anonymous_users + import_categories + import_posts + import_private_messages if @settings.import_private_messages + import_bookmarks if @settings.import_bookmarks + end + + def get_site_settings_for_import + settings = super + + max_file_size_kb = @database.get_max_attachment_size + settings[:max_image_size_kb] = [max_file_size_kb, SiteSetting.max_image_size_kb].max + settings[:max_attachment_size_kb] = [max_file_size_kb, SiteSetting.max_attachment_size_kb].max + + settings + end + + def settings_check_successful? + true + end + + def import_users + puts '', 'creating users' + total_count = @database.count_users + importer = @importers.user_importer + + batches do |offset| + rows = @database.fetch_users(offset) + break if rows.size < 1 + + create_users(rows, total: total_count, offset: offset) do |row| + importer.map_user(row) + end + end + end + + def import_anonymous_users + puts '', 'creating anonymous users' + total_count = @database.count_anonymous_users + importer = @importers.user_importer + + batches do |offset| + rows = @database.fetch_anonymous_users(offset) + break if rows.size < 1 + + create_users(rows, total: total_count, offset: offset) do |row| + importer.map_anonymous_user(row) + end + end + end + + def import_categories + puts '', 'creating categories' + rows = @database.fetch_categories + importer = @importers.category_importer + + create_categories(rows) do |row| + importer.map_category(row) + end + end + + def import_posts + puts '', 'creating topics and posts' + total_count = @database.count_posts + importer = @importers.post_importer + + batches do |offset| + rows = @database.fetch_posts(offset) + break if rows.size < 1 + + create_posts(rows, total: total_count, offset: offset) do |row| + importer.map_post(row) + end + end + end + + def import_private_messages + if @settings.fix_private_messages + puts '', 'fixing private messages' + @database.calculate_fixed_messages + end + + puts '', 'creating private messages' + total_count = @database.count_messages(@settings.fix_private_messages) + importer = @importers.message_importer + + batches do |offset| + rows = @database.fetch_messages(@settings.fix_private_messages, offset) + break if rows.size < 1 + + create_posts(rows, total: total_count, offset: offset) do |row| + importer.map_message(row) + end + end + end + + def import_bookmarks + puts '', 'creating bookmarks' + total_count = @database.count_bookmarks + importer = @importers.bookmark_importer + + batches do |offset| + rows = @database.fetch_bookmarks(offset) + break if rows.size < 1 + + create_bookmarks(rows, total: total_count, offset: offset) do |row| + importer.map_bookmark(row) + end + end + end + + def update_last_seen_at + # no need for this since the importer sets last_seen_at for each user during the import + end + + def use_bbcode_to_md? + @settings.use_bbcode_to_md + end + + def batches + super(@settings.database.batch_size) + end + end +end diff --git a/script/import_scripts/phpbb3/importers/attachment_importer.rb b/script/import_scripts/phpbb3/importers/attachment_importer.rb new file mode 100644 index 00000000000..e41ca7a1209 --- /dev/null +++ b/script/import_scripts/phpbb3/importers/attachment_importer.rb @@ -0,0 +1,36 @@ +module ImportScripts::PhpBB3 + class AttachmentImporter + # @param database [ImportScripts::PhpBB3::Database_3_0 | ImportScripts::PhpBB3::Database_3_1] + # @param uploader [ImportScripts::Uploader] + # @param settings [ImportScripts::PhpBB3::Settings] + # @param phpbb_config [Hash] + def initialize(database, uploader, settings, phpbb_config) + @database = database + @uploader = uploader + + @attachment_path = File.join(settings.base_dir, phpbb_config[:attachment_path]) + end + + def import_attachments(user_id, post_id, topic_id = 0) + rows = @database.fetch_attachments(topic_id, post_id) + return nil if rows.size < 1 + + attachments = [] + + rows.each do |row| + path = File.join(@attachment_path, row[:physical_filename]) + filename = CGI.unescapeHTML(row[:real_filename]) + upload = @uploader.create_upload(user_id, path, filename) + + if upload.nil? || !upload.valid? + puts "Failed to upload #{path}" + puts upload.errors.inspect if upload + else + attachments << @uploader.html_for_upload(upload, filename) + end + end + + attachments + end + end +end diff --git a/script/import_scripts/phpbb3/importers/avatar_importer.rb b/script/import_scripts/phpbb3/importers/avatar_importer.rb new file mode 100644 index 00000000000..3db8b701004 --- /dev/null +++ b/script/import_scripts/phpbb3/importers/avatar_importer.rb @@ -0,0 +1,107 @@ +module ImportScripts::PhpBB3 + class AvatarImporter + # @param uploader [ImportScripts::Uploader] + # @param settings [ImportScripts::PhpBB3::Settings] + # @param phpbb_config [Hash] + def initialize(uploader, settings, phpbb_config) + @uploader = uploader + @settings = settings + + @uploaded_avatar_path = File.join(settings.base_dir, phpbb_config[:avatar_path]) + @gallery_path = File.join(settings.base_dir, phpbb_config[:avatar_gallery_path]) + @avatar_salt = phpbb_config[:avatar_salt] + end + + def import_avatar(user, row) + avatar_type = row[:user_avatar_type] + return unless is_avatar_importable?(user, avatar_type) + + filename = row[:user_avatar] + path = get_avatar_path(avatar_type, filename) + return if path.nil? + + begin + filename = "avatar#{File.extname(path)}" + upload = @uploader.create_upload(user.id, path, filename) + + if upload.persisted? + user.import_mode = false + user.create_user_avatar + user.import_mode = true + user.user_avatar.update(custom_upload_id: upload.id) + user.update(uploaded_avatar_id: upload.id) + else + Rails.logger.error("Could not persist avatar for user #{user.username}") + end + rescue SystemCallError => err + Rails.logger.error("Could not import avatar for user #{user.username}: #{err.message}") + end + end + + protected + + def is_avatar_importable?(user, avatar_type) + is_allowed_avatar_type?(avatar_type) && user.uploaded_avatar_id.blank? + end + + def get_avatar_path(avatar_type, filename) + case avatar_type + when Constants::AVATAR_TYPE_UPLOADED then + filename.gsub!(/_[0-9]+\./, '.') # we need 1337.jpg, not 1337_2983745.jpg + get_uploaded_path(filename) + when Constants::AVATAR_TYPE_GALLERY then + get_gallery_path(filename) + when Constants::AVATAR_TYPE_REMOTE then + download_avatar(filename) + else + Rails.logger.error("Invalid avatar type #{avatar_type}. Skipping...") + nil + end + end + + # Tries to download the remote avatar. + def download_avatar(url) + max_image_size_kb = SiteSetting.max_image_size_kb.kilobytes + + begin + avatar_file = FileHelper.download(url, max_image_size_kb, 'discourse-avatar') + rescue StandardError => err + warn "Error downloading avatar: #{err.message}. Skipping..." + return nil + end + + if avatar_file + if avatar_file.size <= max_image_size_kb + return avatar_file + else + Rails.logger.error("Failed to download remote avatar: #{url} - Image is larger than #{max_image_size_kb} KB") + return nil + end + end + + Rails.logger.error("There was an error while downloading '#{url}' locally.") + nil + end + + def get_uploaded_path(filename) + File.join(@uploaded_avatar_path, "#{@avatar_salt}_#{filename}") + end + + def get_gallery_path(filename) + File.join(@gallery_path, filename) + end + + def is_allowed_avatar_type?(avatar_type) + case avatar_type + when Constants::AVATAR_TYPE_UPLOADED then + @settings.import_uploaded_avatars + when Constants::AVATAR_TYPE_REMOTE then + @settings.import_remote_avatars + when Constants::AVATAR_TYPE_GALLERY then + @settings.import_gallery_avatars + else + false + end + end + end +end diff --git a/script/import_scripts/phpbb3/importers/bookmark_importer.rb b/script/import_scripts/phpbb3/importers/bookmark_importer.rb new file mode 100644 index 00000000000..febc8ab8697 --- /dev/null +++ b/script/import_scripts/phpbb3/importers/bookmark_importer.rb @@ -0,0 +1,10 @@ +module ImportScripts::PhpBB3 + class BookmarkImporter + def map_bookmark(row) + { + user_id: row[:user_id], + post_id: row[:topic_first_post_id] + } + end + end +end diff --git a/script/import_scripts/phpbb3/importers/category_importer.rb b/script/import_scripts/phpbb3/importers/category_importer.rb new file mode 100644 index 00000000000..65eeb4097e6 --- /dev/null +++ b/script/import_scripts/phpbb3/importers/category_importer.rb @@ -0,0 +1,47 @@ +module ImportScripts::PhpBB3 + class CategoryImporter + # @param lookup [ImportScripts::LookupContainer] + # @param text_processor [ImportScripts::PhpBB3::TextProcessor] + def initialize(lookup, text_processor) + @lookup = lookup + @text_processor = text_processor + end + + def map_category(row) + { + id: row[:forum_id], + name: CGI.unescapeHTML(row[:forum_name]), + parent_category_id: @lookup.category_id_from_imported_category_id(row[:parent_id]), + post_create_action: proc do |category| + update_category_description(category, row) + end + } + end + + protected + + # @param category [Category] + def update_category_description(category, row) + return if row[:forum_desc].blank? && row[:first_post_time].blank? + + topic = category.topic + post = topic.first_post + + if row[:first_post_time].present? + created_at = Time.zone.at(row[:first_post_time]) + + topic.created_at = created_at + topic.save + + post.created_at = created_at + post.save + end + + if row[:forum_desc].present? + changes = {raw: @text_processor.process_raw_text(row[:forum_desc])} + opts = {revised_at: post.created_at, bypass_bump: true} + post.revise(Discourse.system_user, changes, opts) + end + end + end +end diff --git a/script/import_scripts/phpbb3/importers/importer_factory.rb b/script/import_scripts/phpbb3/importers/importer_factory.rb new file mode 100644 index 00000000000..4b793a153ae --- /dev/null +++ b/script/import_scripts/phpbb3/importers/importer_factory.rb @@ -0,0 +1,69 @@ +require_relative 'attachment_importer' +require_relative 'avatar_importer' +require_relative 'bookmark_importer' +require_relative 'category_importer' +require_relative 'message_importer' +require_relative 'poll_importer' +require_relative 'post_importer' +require_relative 'user_importer' +require_relative '../support/smiley_processor' +require_relative '../support/text_processor' + +module ImportScripts::PhpBB3 + class ImporterFactory + # @param database [ImportScripts::PhpBB3::Database_3_0 | ImportScripts::PhpBB3::Database_3_1] + # @param lookup [ImportScripts::LookupContainer] + # @param uploader [ImportScripts::Uploader] + # @param settings [ImportScripts::PhpBB3::Settings] + # @param phpbb_config [Hash] + def initialize(database, lookup, uploader, settings, phpbb_config) + @database = database + @lookup = lookup + @uploader = uploader + @settings = settings + @phpbb_config = phpbb_config + end + + def user_importer + UserImporter.new(avatar_importer, @settings) + end + + def category_importer + CategoryImporter.new(@lookup, text_processor) + end + + def post_importer + PostImporter.new(@lookup, text_processor, attachment_importer, poll_importer, @settings) + end + + def message_importer + MessageImporter.new(@database, @lookup, text_processor, attachment_importer, @settings) + end + + def bookmark_importer + BookmarkImporter.new + end + + protected + + def attachment_importer + AttachmentImporter.new(@database, @uploader, @settings, @phpbb_config) + end + + def avatar_importer + AvatarImporter.new(@uploader, @settings, @phpbb_config) + end + + def poll_importer + PollImporter.new(@lookup, @database, text_processor) + end + + def text_processor + @text_processor ||= TextProcessor.new(@lookup, @database, smiley_processor, @settings) + end + + def smiley_processor + SmileyProcessor.new(@uploader, @settings, @phpbb_config) + end + end +end diff --git a/script/import_scripts/phpbb3/importers/message_importer.rb b/script/import_scripts/phpbb3/importers/message_importer.rb new file mode 100644 index 00000000000..6200b0b0230 --- /dev/null +++ b/script/import_scripts/phpbb3/importers/message_importer.rb @@ -0,0 +1,83 @@ +module ImportScripts::PhpBB3 + class MessageImporter + # @param database [ImportScripts::PhpBB3::Database_3_0 | ImportScripts::PhpBB3::Database_3_1] + # @param lookup [ImportScripts::LookupContainer] + # @param text_processor [ImportScripts::PhpBB3::TextProcessor] + # @param attachment_importer [ImportScripts::PhpBB3::AttachmentImporter] + # @param settings [ImportScripts::PhpBB3::Settings] + def initialize(database, lookup, text_processor, attachment_importer, settings) + @database = database + @lookup = lookup + @text_processor = text_processor + @attachment_importer = attachment_importer + @settings = settings + end + + def map_message(row) + user_id = @lookup.user_id_from_imported_user_id(row[:author_id]) || Discourse.system_user.id + attachments = import_attachments(row, user_id) + + mapped = { + id: "pm:#{row[:msg_id]}", + user_id: user_id, + created_at: Time.zone.at(row[:message_time]), + raw: @text_processor.process_private_msg(row[:message_text], attachments) + } + + if row[:root_msg_id] == 0 + map_first_message(row, mapped) + else + map_other_message(row, mapped) + end + end + + protected + + def import_attachments(row, user_id) + if @settings.import_attachments && row[:attachment_count] > 0 + @attachment_importer.import_attachments(user_id, row[:msg_id]) + end + end + + def map_first_message(row, mapped) + mapped[:title] = CGI.unescapeHTML(row[:message_subject]) + mapped[:archetype] = Archetype.private_message + mapped[:target_usernames] = get_usernames(row[:msg_id], row[:author_id]) + + if mapped[:target_usernames].empty? # pm with yourself? + puts "Private message without recipients. Skipping #{row[:msg_id]}: #{row[:message_subject][0..40]}" + return nil + end + + mapped + end + + def map_other_message(row, mapped) + parent_msg_id = "pm:#{row[:root_msg_id]}" + parent = @lookup.topic_lookup_from_imported_post_id(parent_msg_id) + + if parent.blank? + puts "Parent post #{parent_msg_id} doesn't exist. Skipping #{row[:msg_id]}: #{row[:message_subject][0..40]}" + return nil + end + + mapped[:topic_id] = parent[:topic_id] + mapped + end + + def get_usernames(msg_id, author_id) + # Find the users who are part of this private message. + # Found from the to_address of phpbb_privmsgs, by looking at + # all the rows with the same root_msg_id. + # to_address looks like this: "u_91:u_1234:u_200" + # The "u_" prefix is discarded and the rest is a user_id. + import_user_ids = @database.fetch_message_participants(msg_id, @settings.fix_private_messages) + .map { |r| r[:to_address].split(':') } + .flatten!.uniq.map! { |u| u[2..-1] } + + import_user_ids.map! do |import_user_id| + import_user_id.to_s == author_id.to_s ? nil : @lookup.find_user_by_import_id(import_user_id).try(:username) + end.compact + end + end +end diff --git a/script/import_scripts/phpbb3/importers/poll_importer.rb b/script/import_scripts/phpbb3/importers/poll_importer.rb new file mode 100644 index 00000000000..665aae94e1d --- /dev/null +++ b/script/import_scripts/phpbb3/importers/poll_importer.rb @@ -0,0 +1,155 @@ +module ImportScripts::PhpBB3 + class PollImporter + POLL_PLUGIN_NAME = 'poll' + + # @param lookup [ImportScripts::LookupContainer] + # @param database [ImportScripts::PhpBB3::Database_3_0 | ImportScripts::PhpBB3::Database_3_1] + # @param text_processor [ImportScripts::PhpBB3::TextProcessor] + def initialize(lookup, database, text_processor) + @lookup = lookup + @database = database + @text_processor = text_processor + + poll_plugin = Discourse.plugins.find { |p| p.metadata.name == POLL_PLUGIN_NAME }.singleton_class + @default_poll_name = poll_plugin.const_get(:DEFAULT_POLL_NAME) + @polls_field = poll_plugin.const_get(:POLLS_CUSTOM_FIELD) + @votes_field = poll_plugin.const_get(:VOTES_CUSTOM_FIELD) + end + + # @param poll [ImportScripts::PhpBB3::Poll] + def map_poll(topic_id, poll) + options = get_poll_options(topic_id) + poll_text = get_poll_text(options, poll) + extracted_poll = extract_default_poll(topic_id, poll_text) + + update_poll(extracted_poll, options, topic_id, poll) + + mapped_poll = { + raw: poll_text, + custom_fields: {} + } + + add_polls_field(mapped_poll[:custom_fields], extracted_poll) + add_vote_fields(mapped_poll[:custom_fields], topic_id, poll) + mapped_poll + end + + protected + + def get_poll_options(topic_id) + rows = @database.fetch_poll_options(topic_id) + options_by_text = {} + + rows.each do |row| + option_text = @text_processor.process_raw_text(row[:poll_option_text]).delete("\n") + + if options_by_text.key?(option_text) + # phpBB allows duplicate options (why?!) - we need to merge them + option = options_by_text[option_text] + option[:ids] << row[:poll_option_id] + option[:votes] += row[:poll_option_total] + else + options_by_text[option_text] = { + ids: [row[:poll_option_id]], + text: option_text, + votes: row[:poll_option_total] + } + end + end + + options_by_text.values + end + + # @param options [Array] + # @param poll [ImportScripts::PhpBB3::Poll] + def get_poll_text(options, poll) + poll_text = "#{poll.title}\n" + + if poll.max_options > 1 + poll_text << "[poll type=multiple max=#{poll.max_options}]" + else + poll_text << '[poll]' + end + + options.each do |option| + poll_text << "\n- #{option[:text]}" + end + + poll_text << "\n[/poll]" + end + + def extract_default_poll(topic_id, poll_text) + extracted_polls = DiscoursePoll::Poll::extract(poll_text, topic_id) + extracted_polls.each do |poll| + return poll if poll['name'] == @default_poll_name + end + end + + # @param poll [ImportScripts::PhpBB3::Poll] + def update_poll(default_poll, imported_options, topic_id, poll) + default_poll['voters'] = @database.count_voters(topic_id) # this includes anonymous voters + default_poll['status'] = poll.has_ended? ? :open : :closed + + default_poll['options'].each_with_index do |option, index| + imported_option = imported_options[index] + option['votes'] = imported_option[:votes] + poll.add_option_id(imported_option[:ids], option['id']) + end + end + + def add_polls_field(custom_fields, default_poll) + custom_fields[@polls_field] = {@default_poll_name => default_poll} + end + + # @param custom_fields [Hash] + # @param poll [ImportScripts::PhpBB3::Poll] + def add_vote_fields(custom_fields, topic_id, poll) + rows = @database.fetch_poll_votes(topic_id) + warned = false + + rows.each do |row| + option_id = poll.option_id_from_imported_option_id(row[:poll_option_id]) + user_id = @lookup.user_id_from_imported_user_id(row[:user_id]) + + if option_id.present? && user_id.present? + key = "#{@votes_field}-#{user_id}" + + if custom_fields.key?(key) + votes = custom_fields[key][@default_poll_name] + else + votes = [] + custom_fields[key] = {@default_poll_name => votes} + end + + votes << option_id + else !warned + Rails.logger.warn("Topic with id #{topic_id} has invalid votes.") + end + end + end + end + + class Poll + attr_reader :title + attr_reader :max_options + + def initialize(title, max_options, end_timestamp) + @title = title + @max_options = max_options + @end_timestamp = end_timestamp + @option_ids = {} + end + + def has_ended? + @end_timestamp.nil? || Time.zone.at(@end_timestamp) > Time.now + end + + def add_option_id(imported_ids, option_id) + imported_ids.each { |imported_id| @option_ids[imported_id] = option_id } + end + + def option_id_from_imported_option_id(imported_id) + @option_ids[imported_id] + end + end +end diff --git a/script/import_scripts/phpbb3/importers/post_importer.rb b/script/import_scripts/phpbb3/importers/post_importer.rb new file mode 100644 index 00000000000..be0daebbf22 --- /dev/null +++ b/script/import_scripts/phpbb3/importers/post_importer.rb @@ -0,0 +1,79 @@ +module ImportScripts::PhpBB3 + class PostImporter + # @param lookup [ImportScripts::LookupContainer] + # @param text_processor [ImportScripts::PhpBB3::TextProcessor] + # @param attachment_importer [ImportScripts::PhpBB3::AttachmentImporter] + # @param poll_importer [ImportScripts::PhpBB3::PollImporter] + # @param settings [ImportScripts::PhpBB3::Settings] + def initialize(lookup, text_processor, attachment_importer, poll_importer, settings) + @lookup = lookup + @text_processor = text_processor + @attachment_importer = attachment_importer + @poll_importer = poll_importer + @settings = settings + end + + def map_post(row) + imported_user_id = row[:post_username].blank? ? row[:poster_id] : row[:post_username] + user_id = @lookup.user_id_from_imported_user_id(imported_user_id) || Discourse.system_user.id + is_first_post = row[:post_id] == row[:topic_first_post_id] + + attachments = import_attachments(row, user_id) + + mapped = { + id: row[:post_id], + user_id: user_id, + created_at: Time.zone.at(row[:post_time]), + raw: @text_processor.process_post(row[:post_text], attachments) + } + + if is_first_post + map_first_post(row, mapped) + else + map_other_post(row, mapped) + end + end + + protected + + def import_attachments(row, user_id) + if @settings.import_attachments && row[:post_attachment] > 0 + @attachment_importer.import_attachments(user_id, row[:post_id], row[:topic_id]) + end + end + + def map_first_post(row, mapped) + mapped[:category] = @lookup.category_id_from_imported_category_id(row[:forum_id]) + mapped[:title] = CGI.unescapeHTML(row[:topic_title]).strip[0...255] + mapped[:pinned_at] = mapped[:created_at] unless row[:topic_type] == Constants::POST_NORMAL + mapped[:pinned_globally] = row[:topic_type] == Constants::POST_GLOBAL + + add_poll(row, mapped) if @settings.import_polls + mapped + end + + def map_other_post(row, mapped) + parent = @lookup.topic_lookup_from_imported_post_id(row[:topic_first_post_id]) + + if parent.blank? + puts "Parent post #{row[:topic_first_post_id]} doesn't exist. Skipping #{row[:post_id]}: #{row[:topic_title][0..40]}" + return nil + end + + mapped[:topic_id] = parent[:topic_id] + mapped + end + + def add_poll(row, mapped_post) + return if row[:poll_title].blank? + + poll = Poll.new(row[:poll_title], row[:poll_max_options], row[:poll_end]) + mapped_poll = @poll_importer.map_poll(row[:topic_id], poll) + + if mapped_poll.present? + mapped_post[:raw] = mapped_poll[:raw] << "\n" << mapped_post[:raw] + mapped_post[:custom_fields] = mapped_poll[:custom_fields] + end + end + end +end diff --git a/script/import_scripts/phpbb3/importers/user_importer.rb b/script/import_scripts/phpbb3/importers/user_importer.rb new file mode 100644 index 00000000000..0a9ba8c45be --- /dev/null +++ b/script/import_scripts/phpbb3/importers/user_importer.rb @@ -0,0 +1,97 @@ +require_relative '../support/constants' + +module ImportScripts::PhpBB3 + class UserImporter + # @param avatar_importer [ImportScripts::PhpBB3::AvatarImporter] + # @param settings [ImportScripts::PhpBB3::Settings] + def initialize(avatar_importer, settings) + @avatar_importer = avatar_importer + @settings = settings + end + + def map_user(row) + is_active_user = row[:user_inactive_reason] != Constants::INACTIVE_REGISTER + + { + id: row[:user_id], + email: row[:user_email], + username: row[:username], + name: @settings.username_as_name ? row[:username] : '', + created_at: Time.zone.at(row[:user_regdate]), + last_seen_at: row[:user_lastvisit] == 0 ? Time.zone.at(row[:user_regdate]) : Time.zone.at(row[:user_lastvisit]), + registration_ip_address: (IPAddr.new(row[:user_ip]) rescue nil), + active: is_active_user, + trust_level: row[:user_posts] == 0 ? TrustLevel[0] : TrustLevel[1], + approved: is_active_user, + approved_by_id: is_active_user ? Discourse.system_user.id : nil, + approved_at: is_active_user ? Time.now : nil, + moderator: row[:group_name] == Constants::GROUP_MODERATORS, + admin: row[:group_name] == Constants::GROUP_ADMINISTRATORS, + website: row[:user_website], + location: row[:user_from], + date_of_birth: parse_birthdate(row), + post_create_action: proc do |user| + suspend_user(user, row) + @avatar_importer.import_avatar(user, row) if row[:user_avatar_type].present? + end + } + end + + def map_anonymous_user(row) + username = row[:post_username] + + { + id: username, + email: "anonymous_no_email_#{username}", + username: username, + name: '', + created_at: Time.zone.at(row[:first_post_time]), + active: true, + trust_level: TrustLevel[0], + approved: true, + approved_by_id: Discourse.system_user.id, + approved_at: Time.now, + post_create_action: proc do |user| + row[:user_inactive_reason] = Constants::INACTIVE_MANUAL + row[:ban_reason] = 'Anonymous user from phpBB3' # TODO i18n + suspend_user(user, row, true) + end + } + end + + protected + + def parse_birthdate(row) + return nil if row[:user_birthday].blank? + Date.strptime(row[:user_birthday].delete(' '), '%d-%m-%Y') rescue nil + end + + # Suspends the user if it is currently banned. + def suspend_user(user, row, disable_email = false) + if row[:user_inactive_reason] == Constants::INACTIVE_MANUAL + user.suspended_at = Time.now + user.suspended_till = 200.years.from_now + ban_reason = row[:ban_reason].blank? ? 'Account deactivated by administrator' : row[:ban_reason] # TODO i18n + elsif row[:ban_start].present? + user.suspended_at = Time.zone.at(row[:ban_start]) + user.suspended_till = row[:ban_end] > 0 ? Time.zone.at(row[:ban_end]) : 200.years.from_now + ban_reason = row[:ban_reason] + else + return + end + + if disable_email + user.email_digests = false + user.email_private_messages = false + user.email_direct = false + user.email_always = false + end + + if user.save + StaffActionLogger.new(Discourse.system_user).log_user_suspend(user, ban_reason) + else + Rails.logger.error("Failed to suspend user #{user.username}. #{user.errors.try(:full_messages).try(:inspect)}") + end + end + end +end diff --git a/script/import_scripts/phpbb3/settings.yml b/script/import_scripts/phpbb3/settings.yml new file mode 100644 index 00000000000..b591d39646b --- /dev/null +++ b/script/import_scripts/phpbb3/settings.yml @@ -0,0 +1,59 @@ +# This is an example settings file for the phpBB3 importer. + +database: + type: MySQL # currently only MySQL is supported - more to come soon + host: localhost + username: root + password: + schema: phpbb + table_prefix: phpbb # Usually all table names start with phpbb. Change this, if your forum is using a different prefix. + batch_size: 1000 # Don't change this unless you know what you're doing. The default (1000) should work just fine. + +import: + # Enable this option if you want to have a better conversion of BBCodes to Markdown. + # WARNING: This can slow down your import. + use_bbcode_to_md: false + + # This is the path to the root directory of your current phpBB installation (or a copy of it). + # The importer expects to find the /files and /images directories within the base directory. + # This is only needed if you want to import avatars, attachments or custom smilies. + phpbb_base_dir: /var/www/phpbb + + site_prefix: + # this is needed for rewriting internal links in posts + original: oldsite.example.com/forums # without http(s):// + new: http://discourse.example.com # with http:// or https:// + + avatars: + uploaded: true # import uploaded avatars + gallery: true # import the predefined avatars phpBB offers + remote: false # WARNING: This can considerably slow down your import. It will try to download remote avatars. + + # When true: Anonymous users are imported as suspended users. They can't login and have no email address. + # When false: The system user will be used for all anonymous users. + anonymous_users: true + + # By default all the following things get imported. You can disable them by setting them to false. + bookmarks: true + attachments: true + private_messages: true + polls: true + + # This tries to fix Private Messages that were imported from phpBB2 to phpBB3. + # You should enable this option if you see duplicate messages or lots of related + # messages as topics with just one post (e.g. 'Importer', 'Re: Importer', 'Re: Importer' + # should be one topic named 'Importer' and consist of 3 posts). + fix_private_messages: false + + # When true: each imported user will have the original username from phpBB as its name + # When false: the name of each user will be blank + username_as_name: false + + # Map Emojis to smilies used in phpBB. Most of the default smilies already have a mapping, but you can override + # the mappings here, if you don't like some of them. + # The mapping syntax is: emoji_name: 'smiley_in_phpbb' + # Or map multiple smilies to one Emoji: emoji_name: ['smiley1', 'smiley2'] + emojis: + # here are two example mappings... + smiley: [':D', ':-D', ':grin:'] + heart: ':love:' diff --git a/script/import_scripts/phpbb3/support/constants.rb b/script/import_scripts/phpbb3/support/constants.rb new file mode 100644 index 00000000000..af7482d5da6 --- /dev/null +++ b/script/import_scripts/phpbb3/support/constants.rb @@ -0,0 +1,35 @@ +module ImportScripts::PhpBB3 + class Constants + ACTIVE_USER = 0 + INACTIVE_REGISTER = 1 # Newly registered account + INACTIVE_PROFILE = 2 # Profile details changed + INACTIVE_MANUAL = 3 # Account deactivated by administrator + INACTIVE_REMIND = 4 # Forced user account reactivation + + GROUP_ADMINISTRATORS = 'ADMINISTRATORS' + GROUP_MODERATORS = 'GLOBAL_MODERATORS' + + # https://wiki.phpbb.com/Table.phpbb_users + USER_TYPE_NORMAL = 0 + USER_TYPE_INACTIVE = 1 + USER_TYPE_IGNORE = 2 + USER_TYPE_FOUNDER = 3 + + AVATAR_TYPE_UPLOADED = 1 + AVATAR_TYPE_REMOTE = 2 + AVATAR_TYPE_GALLERY = 3 + + FORUM_TYPE_CATEGORY = 0 + FORUM_TYPE_POST = 1 + FORUM_TYPE_LINK = 2 + + TOPIC_UNLOCKED = 0 + TOPIC_LOCKED = 1 + TOPIC_MOVED = 2 + + POST_NORMAL = 0 + POST_STICKY = 1 + POST_ANNOUNCE = 2 + POST_GLOBAL = 3 + end +end diff --git a/script/import_scripts/phpbb3/support/settings.rb b/script/import_scripts/phpbb3/support/settings.rb new file mode 100644 index 00000000000..8a0c36ee199 --- /dev/null +++ b/script/import_scripts/phpbb3/support/settings.rb @@ -0,0 +1,78 @@ +require 'yaml' + +module ImportScripts::PhpBB3 + class Settings + def self.load(filename) + yaml = YAML::load_file(filename) + Settings.new(yaml) + end + + attr_reader :import_anonymous_users + attr_reader :import_attachments + attr_reader :import_private_messages + attr_reader :import_polls + attr_reader :import_bookmarks + + attr_reader :import_uploaded_avatars + attr_reader :import_remote_avatars + attr_reader :import_gallery_avatars + + attr_reader :fix_private_messages + attr_reader :use_bbcode_to_md + + attr_reader :original_site_prefix + attr_reader :new_site_prefix + attr_reader :base_dir + + attr_reader :username_as_name + attr_reader :emojis + + attr_reader :database + + def initialize(yaml) + import_settings = yaml['import'] + @import_anonymous_users = import_settings['anonymous_users'] + @import_attachments = import_settings['attachments'] + @import_private_messages = import_settings['private_messages'] + @import_polls = import_settings['polls'] + @import_bookmarks = import_settings['bookmarks'] + + avatar_settings = import_settings['avatars'] + @import_uploaded_avatars = avatar_settings['uploaded'] + @import_remote_avatars = avatar_settings['remote'] + @import_gallery_avatars = avatar_settings['gallery'] + + @fix_private_messages = import_settings['fix_private_messages'] + @use_bbcode_to_md =import_settings['use_bbcode_to_md'] + + @original_site_prefix = import_settings['site_prefix']['original'] + @new_site_prefix = import_settings['site_prefix']['new'] + @base_dir = import_settings['phpbb_base_dir'] + + @username_as_name = import_settings['username_as_name'] + @emojis = import_settings.fetch('emojis', []) + + @database = DatabaseSettings.new(yaml['database']) + end + end + + class DatabaseSettings + attr_reader :type + attr_reader :host + attr_reader :username + attr_reader :password + attr_reader :schema + attr_reader :table_prefix + attr_reader :batch_size + + def initialize(yaml) + @type = yaml['type'] + @host = yaml['host'] + @username = yaml['username'] + @password = yaml['password'] + @schema = yaml['schema'] + @table_prefix = yaml['table_prefix'] + @batch_size = yaml['batch_size'] + end + end +end diff --git a/script/import_scripts/phpbb3/support/smiley_processor.rb b/script/import_scripts/phpbb3/support/smiley_processor.rb new file mode 100644 index 00000000000..f79a24c4659 --- /dev/null +++ b/script/import_scripts/phpbb3/support/smiley_processor.rb @@ -0,0 +1,90 @@ +module ImportScripts::PhpBB3 + class SmileyProcessor + # @param uploader [ImportScripts::Uploader] + # @param settings [ImportScripts::PhpBB3::Settings] + # @param phpbb_config [Hash] + def initialize(uploader, settings, phpbb_config) + @uploader = uploader + @smilies_path = File.join(settings.base_dir, phpbb_config[:smilies_path]) + + @smiley_map = {} + add_default_smilies + add_configured_smilies(settings.emojis) + end + + def replace_smilies(text) + # :) is encoded as :) + text.gsub!(/(.*?)/) do + smiley = $1 + + @smiley_map.fetch(smiley) do + upload_smiley(smiley, $2, $3, $4) || smiley_as_text(smiley) + end + end + end + + protected + + def add_default_smilies + { + [':D', ':-D', ':grin:'] => ':smiley:', + [':)', ':-)', ':smile:'] => ':smile:', + [';)', ';-)', ':wink:'] => ':wink:', + [':(', ':-(', ':sad:'] => ':frowning:', + [':o', ':-o', ':eek:'] => ':astonished:', + [':shock:'] => ':open_mouth:', + [':?', ':-?', ':???:'] => ':confused:', + ['8-)', ':cool:'] => ':sunglasses:', + [':lol:'] => ':laughing:', + [':x', ':-x', ':mad:'] => ':angry:', + [':P', ':-P', ':razz:'] => ':stuck_out_tongue:', + [':oops:'] => ':blush:', + [':cry:'] => ':cry:', + [':evil:'] => ':imp:', + [':twisted:'] => ':smiling_imp:', + [':roll:'] => ':unamused:', + [':!:'] => ':exclamation:', + [':?:'] => ':question:', + [':idea:'] => ':bulb:', + [':arrow:'] => ':arrow_right:', + [':|', ':-|'] => ':neutral_face:' + }.each do |smilies, emoji| + smilies.each { |smiley| @smiley_map[smiley] = emoji } + end + end + + def add_configured_smilies(emojis) + emojis.each do |emoji, smilies| + Array.wrap(smilies) + .each { |smiley| @smiley_map[smiley] = ":#{emoji}:" } + end + end + + def upload_smiley(smiley, path, alt_text, title) + path = File.join(@smilies_path, path) + filename = File.basename(path) + upload = @uploader.create_upload(Discourse::SYSTEM_USER_ID, path, filename) + + if upload.nil? || !upload.valid? + puts "Failed to upload #{path}" + puts upload.errors.inspect if upload + html = nil + else + html = embedded_image_html(upload, alt_text, title) + @smiley_map[smiley] = html + end + + html + end + + def embedded_image_html(upload, alt_text, title) + image_width = [upload.width, SiteSetting.max_image_width].compact.min + image_height = [upload.height, SiteSetting.max_image_height].compact.min + %Q[#{alt_text}] + end + + def smiley_as_text(smiley) + @smiley_map[smiley] = smiley + end + end +end diff --git a/script/import_scripts/phpbb3/support/text_processor.rb b/script/import_scripts/phpbb3/support/text_processor.rb new file mode 100644 index 00000000000..c0e99e4dd2a --- /dev/null +++ b/script/import_scripts/phpbb3/support/text_processor.rb @@ -0,0 +1,133 @@ +module ImportScripts::PhpBB3 + class TextProcessor + # @param lookup [ImportScripts::LookupContainer] + # @param database [ImportScripts::PhpBB3::Database_3_0 | ImportScripts::PhpBB3::Database_3_1] + # @param smiley_processor [ImportScripts::PhpBB3::SmileyProcessor] + # @param settings [ImportScripts::PhpBB3::Settings] + def initialize(lookup, database, smiley_processor, settings) + @lookup = lookup + @database = database + @smiley_processor = smiley_processor + + @new_site_prefix = settings.new_site_prefix + create_internal_link_regexps(settings.original_site_prefix) + end + + def process_raw_text(raw) + text = raw.dup + text = CGI.unescapeHTML(text) + + clean_bbcodes(text) + process_smilies(text) + process_links(text) + process_lists(text) + + text + end + + def process_post(raw, attachments) + text = process_raw_text(raw) + text = process_attachments(text, attachments) if attachments.present? + text + end + + def process_private_msg(raw, attachments) + text = process_raw_text(raw) + text = process_attachments(text, attachments) if attachments.present? + text + end + + protected + + def clean_bbcodes(text) + # Many phpbb bbcode tags have a hash attached to them. Examples: + # [url=https://google.com:1qh1i7ky]click here[/url:1qh1i7ky] + # [quote="cybereality":b0wtlzex]Some text.[/quote:b0wtlzex] + text.gsub!(/:(?:\w{8})\]/, ']') + end + + def process_smilies(text) + @smiley_processor.replace_smilies(text) + end + + def process_links(text) + # Internal forum links can have this forms: + # for topics: viewtopic.php?f=26&t=3412 + # for posts: viewtopic.php?p=1732#p1732 + text.gsub!(@long_internal_link_regexp) do |link| + replace_internal_link(link, $1, $2) + end + + # Some links look like this: http://www.onegameamonth.com + text.gsub!(/(.+)<\/a>/i, '[\2](\1)') + + # Replace internal forum links that aren't in the format + text.gsub!(@short_internal_link_regexp) do |link| + replace_internal_link(link, $1, $2) + end + + # phpBB shortens link text like this, which breaks our markdown processing: + # [http://answers.yahoo.com/question/index ... 223AAkkPli](http://answers.yahoo.com/question/index?qid=20070920134223AAkkPli) + # + # Work around it for now: + text.gsub!(/\[http(s)?:\/\/(www\.)?/i, '[') + end + + def replace_internal_link(link, import_topic_id, import_post_id) + if import_post_id.nil? + replace_internal_topic_link(link, import_topic_id) + else + replace_internal_post_link(link, import_post_id) + end + end + + def replace_internal_topic_link(link, import_topic_id) + import_post_id = @database.get_first_post_id(import_topic_id) + return link if import_post_id.nil? + + replace_internal_post_link(link, import_post_id) + end + + def replace_internal_post_link(link, import_post_id) + topic = @lookup.topic_lookup_from_imported_post_id(import_post_id) + topic ? "#{@new_site_prefix}#{topic[:url]}" : link + end + + def process_lists(text) + # convert list tags to ul and list=1 tags to ol + # list=a is not supported, so handle it like list=1 + # list=9 and list=x have the same result as list=1 and list=a + text.gsub!(/\[list\](.*?)\[\/list:u\]/mi, '[ul]\1[/ul]') + text.gsub!(/\[list=.*?\](.*?)\[\/list:o\]/mi, '[ol]\1[/ol]') + + # convert *-tags to li-tags so bbcode-to-md can do its magic on phpBB's lists: + text.gsub!(/\[\*\](.*?)\[\/\*:m\]/mi, '[li]\1[/li]') + end + + # This replaces existing [attachment] BBCodes with the corresponding HTML tags for Discourse. + # All attachments that haven't been referenced in the text are appended to the end of the text. + def process_attachments(text, attachments) + attachment_regexp = /\[attachment=([\d])+\]([^<]+)\[\/attachment\]?/i + unreferenced_attachments = attachments.dup + + text = text.gsub(attachment_regexp) do + index = $1.to_i + real_filename = $2 + unreferenced_attachments[index] = nil + attachments.fetch(index, real_filename) + end + + unreferenced_attachments = unreferenced_attachments.compact + text << "\n" << unreferenced_attachments.join("\n") unless unreferenced_attachments.empty? + text + end + + def create_internal_link_regexps(original_site_prefix) + host = original_site_prefix.gsub('.', '\.') + link_regex = "http(?:s)?://#{host}/viewtopic\\.php\\?(?:\\S*)(?:t=(\\d+)|p=(\\d+)(?:#p\\d+)?)" + + @long_internal_link_regexp = Regexp.new(%Q||, Regexp::IGNORECASE) + @short_internal_link_regexp = Regexp.new(link_regex, Regexp::IGNORECASE) + end + end +end