From 1cb45861c5d1ee5c86b58d950e76d754d8423392 Mon Sep 17 00:00:00 2001 From: Gerhard Schlager Date: Sun, 5 Jul 2015 23:17:03 +0200 Subject: [PATCH] FEATURE: Lots of improvements to the phpBB3 importer - Extensive refactoring of the existing importer - Configuration of import with settings.yml instead of editing code - Supports importing from phpBB 3.0.x and 3.1.x - Imports all attachments (not just the ones embedded with [attachment]) from posts and private messages - Imports all existing attachments without the need to configure allowed file extensions or file sizes - Imports polls - Imports bookmarks - Imports sticky topics and (global) announcements as pinned topics - Imports categories in the original order and sets the content of the category description topic - Sets the creation date of category description topics to the creation date of the first topic in each category - Imports additional user attributes: last seen date, registration IP address, website, date of birth, location - Optionally set the user's name to its username - Users that didn't activate their account in phpBB3 are imported as inactive users - All imported, active users are automatically approved - Users that were deactivated in phpBB3 get suspended for 200 years during the import - Anonymous user can be imported as suspended users instead of the system user - Forums of type "link" are not imported as categories anymore - Internal links to posts get rewritten during the import (previously only links to topics got rewritten) - Ordered lists with BBCode [list=a] (which are unsupported in Discourse) get imported as if they would be [list=1] - Importing of avatars, attachments, private messages, polls and bookmarks can be disabled via configuration file - Optional fixing of private messages for forums that have been upgraded from phpBB2 prevents the import of duplicate messages and tries to group related messages into topics - Table prefix (default: phpbb) is configurable - Most of phpBB's default smilies are mapped to Emojis and all other smilies get uploaded and embedded as images. Smiley mappings can be added or overridden in the settings.yml file. --- script/import_scripts/phpbb3.rb | 511 +----------------- .../phpbb3/database/database.rb | 56 ++ .../phpbb3/database/database_3_0.rb | 333 ++++++++++++ .../phpbb3/database/database_3_1.rb | 26 + .../phpbb3/database/database_base.rb | 24 + script/import_scripts/phpbb3/importer.rb | 152 ++++++ .../phpbb3/importers/attachment_importer.rb | 36 ++ .../phpbb3/importers/avatar_importer.rb | 107 ++++ .../phpbb3/importers/bookmark_importer.rb | 10 + .../phpbb3/importers/category_importer.rb | 47 ++ .../phpbb3/importers/importer_factory.rb | 69 +++ .../phpbb3/importers/message_importer.rb | 83 +++ .../phpbb3/importers/poll_importer.rb | 155 ++++++ .../phpbb3/importers/post_importer.rb | 79 +++ .../phpbb3/importers/user_importer.rb | 97 ++++ script/import_scripts/phpbb3/settings.yml | 59 ++ .../phpbb3/support/constants.rb | 35 ++ .../import_scripts/phpbb3/support/settings.rb | 78 +++ .../phpbb3/support/smiley_processor.rb | 90 +++ .../phpbb3/support/text_processor.rb | 133 +++++ 20 files changed, 1696 insertions(+), 484 deletions(-) create mode 100644 script/import_scripts/phpbb3/database/database.rb create mode 100644 script/import_scripts/phpbb3/database/database_3_0.rb create mode 100644 script/import_scripts/phpbb3/database/database_3_1.rb create mode 100644 script/import_scripts/phpbb3/database/database_base.rb create mode 100644 script/import_scripts/phpbb3/importer.rb create mode 100644 script/import_scripts/phpbb3/importers/attachment_importer.rb create mode 100644 script/import_scripts/phpbb3/importers/avatar_importer.rb create mode 100644 script/import_scripts/phpbb3/importers/bookmark_importer.rb create mode 100644 script/import_scripts/phpbb3/importers/category_importer.rb create mode 100644 script/import_scripts/phpbb3/importers/importer_factory.rb create mode 100644 script/import_scripts/phpbb3/importers/message_importer.rb create mode 100644 script/import_scripts/phpbb3/importers/poll_importer.rb create mode 100644 script/import_scripts/phpbb3/importers/post_importer.rb create mode 100644 script/import_scripts/phpbb3/importers/user_importer.rb create mode 100644 script/import_scripts/phpbb3/settings.yml create mode 100644 script/import_scripts/phpbb3/support/constants.rb create mode 100644 script/import_scripts/phpbb3/support/settings.rb create mode 100644 script/import_scripts/phpbb3/support/smiley_processor.rb create mode 100644 script/import_scripts/phpbb3/support/text_processor.rb diff --git a/script/import_scripts/phpbb3.rb b/script/import_scripts/phpbb3.rb index 373db6e755b..639b51d7852 100644 --- a/script/import_scripts/phpbb3.rb +++ b/script/import_scripts/phpbb3.rb @@ -1,486 +1,29 @@ -require "mysql2" -require File.expand_path(File.dirname(__FILE__) + "/base.rb") - -class ImportScripts::PhpBB3 < ImportScripts::Base - - PHPBB_DB = "phpbb" - BATCH_SIZE = 1000 - - ORIGINAL_SITE_PREFIX = "oldsite.example.com/forums" # without http(s):// - NEW_SITE_PREFIX = "http://discourse.example.com" # with http:// or https:// - - # Set PHPBB_BASE_DIR to the base directory of your phpBB installation. - # When importing, you should place the subdirectories "files" (containing all - # attachments) and "images" (containing avatars) in PHPBB_BASE_DIR. - # If nil, [attachment] tags and avatars won't be processed. - # Edit AUTHORIZED_EXTENSIONS as needed. - # If you used ATTACHMENTS_BASE_DIR before, e.g. ATTACHMENTS_BASE_DIR = '/var/www/phpbb/files/' - # would become PHPBB_BASE_DIR = '/var/www/phpbb' - # now. - PHPBB_BASE_DIR = '/var/www/phpbb' - AUTHORIZED_EXTENSIONS = ['jpg', 'jpeg', 'png', 'gif', 'zip', 'rar', 'pdf'] - - # Avatar types to import.: - # 1 = uploaded avatars (you should probably leave this here) - # 2 = hotlinked avatars - WARNING: this will considerably slow down your import - # if there are many hotlinked avatars and some of them unavailable! - # 3 = galery avatars (the predefined avatars phpBB offers. They will be converted to uploaded avatars) - IMPORT_AVATARS = [1, 3] - - def initialize - super - - @client = Mysql2::Client.new( - host: "localhost", - username: "root", - #password: "password", - database: PHPBB_DB - ) - phpbb_read_config - end - - def execute - import_users - import_categories - import_posts - import_private_messages - import_attachments unless PHPBB_BASE_DIR.nil? - suspend_users - end - - def import_users - puts '', "creating users" - - total_count = mysql_query("SELECT count(*) count - FROM phpbb_users u - JOIN phpbb_groups g ON g.group_id = u.group_id - WHERE g.group_name != 'BOTS' - AND u.user_type != 1;").first['count'] - - batches(BATCH_SIZE) do |offset| - results = mysql_query( - "SELECT user_id id, user_email email, username, user_regdate, group_name, user_avatar_type, user_avatar - FROM phpbb_users u - JOIN phpbb_groups g ON g.group_id = u.group_id - WHERE g.group_name != 'BOTS' - AND u.user_type != 1 - ORDER BY u.user_id ASC - LIMIT #{BATCH_SIZE} - OFFSET #{offset};") - - break if results.size < 1 - - create_users(results, total: total_count, offset: offset) do |user| - { id: user['id'], - email: user['email'], - username: user['username'], - created_at: Time.zone.at(user['user_regdate']), - moderator: user['group_name'] == 'GLOBAL_MODERATORS', - admin: user['group_name'] == 'ADMINISTRATORS', - post_create_action: proc do |newmember| - if not PHPBB_BASE_DIR.nil? and IMPORT_AVATARS.include?(user['user_avatar_type']) and newmember.uploaded_avatar_id.blank? - path = phpbb_avatar_fullpath(user['user_avatar_type'], user['user_avatar']) - if path - begin - upload = create_upload(newmember.id, path, user['user_avatar']) - if upload.persisted? - newmember.import_mode = false - newmember.create_user_avatar - newmember.import_mode = true - newmember.user_avatar.update(custom_upload_id: upload.id) - newmember.update(uploaded_avatar_id: upload.id) - else - puts "Error: Upload did not persist!" - end - rescue SystemCallError => err - puts "Could not import avatar: #{err.message}" - end - end - end - end - } - end - end - end - - def import_categories - results = mysql_query(" - SELECT forum_id id, parent_id, left(forum_name, 50) name, forum_desc description - FROM phpbb_forums - ORDER BY parent_id ASC, forum_id ASC - ") - - create_categories(results) do |row| - h = {id: row['id'], name: CGI.unescapeHTML(row['name']), description: CGI.unescapeHTML(row['description'])} - if row['parent_id'].to_i > 0 - h[:parent_category_id] = category_id_from_imported_category_id(row['parent_id']) - end - h - end - end - - def import_posts - puts "", "creating topics and posts" - - total_count = mysql_query("SELECT count(*) count from phpbb_posts").first["count"] - - batches(BATCH_SIZE) do |offset| - results = mysql_query(" - SELECT p.post_id id, - p.topic_id topic_id, - t.forum_id category_id, - t.topic_title title, - t.topic_first_post_id first_post_id, - p.poster_id user_id, - p.post_text raw, - p.post_time post_time - FROM phpbb_posts p, - phpbb_topics t - WHERE p.topic_id = t.topic_id - ORDER BY id - LIMIT #{BATCH_SIZE} - OFFSET #{offset}; - ") - - break if results.size < 1 - - create_posts(results, total: total_count, offset: offset) do |m| - skip = false - mapped = {} - - mapped[:id] = m['id'] - mapped[:user_id] = user_id_from_imported_user_id(m['user_id']) || -1 - mapped[:raw] = process_phpbb_post(m['raw'], m['id']) - mapped[:created_at] = Time.zone.at(m['post_time']) - - if m['id'] == m['first_post_id'] - mapped[:category] = category_id_from_imported_category_id(m['category_id']) - mapped[:title] = CGI.unescapeHTML(m['title']) - else - parent = topic_lookup_from_imported_post_id(m['first_post_id']) - if parent - mapped[:topic_id] = parent[:topic_id] - else - puts "Parent post #{m['first_post_id']} doesn't exist. Skipping #{m["id"]}: #{m["title"][0..40]}" - skip = true - end - end - - skip ? nil : mapped - end - end - end - - def import_private_messages - puts "", "creating private messages" - - total_count = mysql_query("SELECT count(*) count from phpbb_privmsgs").first["count"] - - batches(BATCH_SIZE) do |offset| - results = mysql_query(" - SELECT msg_id id, - root_level, - author_id user_id, - message_time, - message_subject, - message_text - FROM phpbb_privmsgs - ORDER BY root_level ASC, msg_id ASC - LIMIT #{BATCH_SIZE} - OFFSET #{offset}; - ") - - break if results.size < 1 - - create_posts(results, total: total_count, offset: offset) do |m| - skip = false - mapped = {} - - mapped[:id] = "pm:#{m['id']}" - mapped[:user_id] = user_id_from_imported_user_id(m['user_id']) || -1 - mapped[:raw] = process_phpbb_post(m['message_text'], m['id']) - mapped[:created_at] = Time.zone.at(m['message_time']) - - if m['root_level'] == 0 - mapped[:title] = CGI.unescapeHTML(m['message_subject']) - mapped[:archetype] = Archetype.private_message - - # Find the users who are part of this private message. - # Found from the to_address of phpbb_privmsgs, by looking at - # all the rows with the same root_level. - # to_address looks like this: "u_91:u_1234:u_200" - # The "u_" prefix is discarded and the rest is a user_id. - - import_user_ids = mysql_query(" - SELECT to_address - FROM phpbb_privmsgs - WHERE msg_id = #{m['id']} - OR root_level = #{m['id']}").map { |r| r['to_address'].split(':') }.flatten!.map! { |u| u[2..-1] } - - mapped[:target_usernames] = import_user_ids.map! do |import_user_id| - import_user_id.to_s == m['user_id'].to_s ? nil : User.find_by_id(user_id_from_imported_user_id(import_user_id)).try(:username) - end.compact.uniq - - skip = true if mapped[:target_usernames].empty? # pm with yourself? - else - parent = topic_lookup_from_imported_post_id("pm:#{m['root_level']}") - if parent - mapped[:topic_id] = parent[:topic_id] - else - puts "Parent post pm:#{m['root_level']} doesn't exist. Skipping #{m["id"]}: #{m["message_subject"][0..40]}" - skip = true - end - end - - skip ? nil : mapped - end - end - end - - def suspend_users - puts '', "updating banned users" - - where = "ban_userid > 0 AND (ban_end = 0 OR ban_end > #{Time.zone.now.to_i})" - - banned = 0 - failed = 0 - total = mysql_query("SELECT count(*) count FROM phpbb_banlist WHERE #{where}").first['count'] - - system_user = Discourse.system_user - - mysql_query("SELECT ban_userid, ban_start, ban_end, ban_give_reason FROM phpbb_banlist WHERE #{where}").each do |b| - user = find_user_by_import_id(b['ban_userid']) - if user - user.suspended_at = Time.zone.at(b['ban_start']) - user.suspended_till = b['ban_end'] > 0 ? Time.zone.at(b['ban_end']) : 200.years.from_now - - if user.save - StaffActionLogger.new(system_user).log_user_suspend(user, b['ban_give_reason']) - banned += 1 - else - puts "Failed to suspend user #{user.username}. #{user.errors.try(:full_messages).try(:inspect)}" - failed += 1 - end - else - puts "Not found: #{b['ban_userid']}" - failed += 1 - end - - print_status banned + failed, total - end - end - - def process_phpbb_post(raw, import_id) - s = raw.dup - - # :) is encoded as :) - s.gsub!(/]+) \/>/, '\1') - - # Internal forum links of this form: viewtopic.php?f=26&t=3412 - s.gsub!(/viewtopic(?:.*)t=(\d+)<\/a>/) do |phpbb_link| - replace_internal_link(phpbb_link, $1, import_id) - end - - # Some links look like this: http://www.onegameamonth.com - s.gsub!(/(.+)<\/a>/, '[\2](\1)') - - # Many phpbb bbcode tags have a hash attached to them. Examples: - # [url=https://google.com:1qh1i7ky]click here[/url:1qh1i7ky] - # [quote="cybereality":b0wtlzex]Some text.[/quote:b0wtlzex] - s.gsub!(/:(?:\w{8})\]/, ']') - - s = CGI.unescapeHTML(s) - - # phpBB shortens link text like this, which breaks our markdown processing: - # [http://answers.yahoo.com/question/index ... 223AAkkPli](http://answers.yahoo.com/question/index?qid=20070920134223AAkkPli) - # - # Work around it for now: - s.gsub!(/\[http(s)?:\/\/(www\.)?/, '[') - - # Replace internal forum links that aren't in the format - s.gsub!(internal_url_regexp) do |phpbb_link| - replace_internal_link(phpbb_link, $1, import_id) - end - # convert list tags to ul and list=1 tags to ol - # (basically, we're only missing list=a here...) - s.gsub!(/\[list\](.*?)\[\/list:u\]/m, '[ul]\1[/ul]') - s.gsub!(/\[list=1\](.*?)\[\/list:o\]/m, '[ol]\1[/ol]') - # convert *-tags to li-tags so bbcode-to-md can do its magic on phpBB's lists: - s.gsub!(/\[\*\](.*?)\[\/\*:m\]/, '[li]\1[/li]') - - s - end - - def replace_internal_link(phpbb_link, import_topic_id, from_import_post_id) - results = mysql_query("select topic_first_post_id from phpbb_topics where topic_id = #{import_topic_id}") - - return phpbb_link unless results.size > 0 - - linked_topic_id = results.first['topic_first_post_id'] - lookup = topic_lookup_from_imported_post_id(linked_topic_id) - - return phpbb_link unless lookup - - t = Topic.find_by_id(lookup[:topic_id]) - if t - "#{NEW_SITE_PREFIX}/t/#{t.slug}/#{t.id}" - else - phpbb_link - end - end - - def internal_url_regexp - @internal_url_regexp ||= Regexp.new("http(?:s)?://#{ORIGINAL_SITE_PREFIX.gsub('.', '\.')}/viewtopic\\.php?(?:\\S*)t=(\\d+)") - end - - # This step is done separately because it can take multiple attempts to get right (because of - # missing files, wrong paths, authorized extensions, etc.). - def import_attachments - setting = AUTHORIZED_EXTENSIONS.join('|') - SiteSetting.authorized_extensions = setting if setting != SiteSetting.authorized_extensions - - r = /\[attachment=[\d]+\]<\!-- [\w]+ --\>([^<]+)<\!-- [\w]+ --\>\[\/attachment\]/ - - user = Discourse.system_user - - current_count = 0 - total_count = Post.count - success_count = 0 - fail_count = 0 - - puts '', "Importing attachments...", '' - - Post.find_each do |post| - current_count += 1 - print_status current_count, total_count - - new_raw = post.raw.dup - new_raw.gsub!(r) do |s| - matches = r.match(s) - real_filename = matches[1] - - # note: currently, we do not import PM attachments. - # If this should be desired, this has to be fixed, - # otherwise, the SQL state coughs up an error for the - # clause "WHERE post_msg_id = pm12345"... - next s if post.custom_fields['import_id'].start_with?('pm:') - - sql = "SELECT physical_filename, - mimetype - FROM phpbb_attachments - WHERE post_msg_id = #{post.custom_fields['import_id']} - AND real_filename = '#{real_filename}';" - - begin - results = mysql_query(sql) - rescue Mysql2::Error => e - puts "SQL Error" - puts e.message - puts sql - fail_count += 1 - next s - end - - row = results.first - if !row - puts "Couldn't find phpbb_attachments record for post.id = #{post.id}, import_id = #{post.custom_fields['import_id']}, real_filename = #{real_filename}" - fail_count += 1 - next s - end - - filename = File.join(PHPBB_BASE_DIR+'/files', row['physical_filename']) - if !File.exists?(filename) - puts "Attachment file doesn't exist: #{filename}" - fail_count += 1 - next s - end - - upload = create_upload(user.id, filename, real_filename) - - if upload.nil? || !upload.valid? - puts "Upload not valid :(" - puts upload.errors.inspect if upload - fail_count += 1 - next s - end - - success_count += 1 - - html_for_upload(upload, real_filename) - end - - if new_raw != post.raw - PostRevisor.new(post).revise!(post.user, { raw: new_raw }, { bypass_bump: true, edit_reason: 'Migrate from PHPBB3' }) - end - end - - puts '', '' - puts "succeeded: #{success_count}" - puts " failed: #{fail_count}" if fail_count > 0 - puts '' - end - - # Read avatar config from phpBB configuration table. - # Stored there: - paths relative to the phpBB install path - # - "salt", i.e. base filename for uploaded avatars - # - def phpbb_read_config - results = mysql_query("SELECT config_name, config_value - FROM phpbb_config;") - if results.size<1 - puts "could not read config... no avatars and attachments will be imported!" - return - end - results.each do |result| - if result['config_name']=='avatar_gallery_path' - @avatar_gallery_path = result['config_value'] - elsif result['config_name']=='avatar_path' - @avatar_path = result['config_value'] - elsif result['config_name']=='avatar_salt' - @avatar_salt = result['config_value'] - end - end - end - - # Create the full path to the phpBB avatar specified by avatar_type and filename. - # - def phpbb_avatar_fullpath(avatar_type, filename) - case avatar_type - when 1 # uploaded avatar - filename.gsub!(/_[0-9]+\./,'.') # we need 1337.jpg, not 1337_2983745.jpg - path=@avatar_path - PHPBB_BASE_DIR+'/'+path+'/'+@avatar_salt+'_'+filename - when 3 # gallery avatar - path=@avatar_gallery_path - PHPBB_BASE_DIR+'/'+path+'/'+filename - when 2 # hotlinked avatar - begin - hotlinked = FileHelper.download(filename, SiteSetting.max_image_size_kb.kilobytes, "discourse-hotlinked") - rescue StandardError => err - puts "Error downloading avatar: #{err.message}. Skipping..." - return nil - end - if hotlinked - if hotlinked.size <= SiteSetting.max_image_size_kb.kilobytes - return hotlinked - else - Rails.logger.error("Failed to pull hotlinked image: #{filename} - Image is bigger than #{@max_size}") - nil - end - else - Rails.logger.error("There was an error while downloading '#{filename}' locally.") - nil - end - else - puts 'Invalid avatar type #{avatar_type}, skipping' - nil - end - end - - - def mysql_query(sql) - @client.query(sql, cache_rows: false) - end +if ARGV.length != 1 || !File.exists?(ARGV[0]) + STDERR.puts '', 'Usage of phpBB3 importer:', 'bundle exec ruby phpbb3.rb ' + STDERR.puts '', "Use the settings file from #{File.expand_path('phpbb3/settings.yml', File.dirname(__FILE__))} as an example." + exit 1 end -ImportScripts::PhpBB3.new.perform +module ImportScripts + module PhpBB3 + require_relative 'phpbb3/support/settings' + require_relative 'phpbb3/database/database' + + @settings = Settings.load(ARGV[0]) + + # We need to load the gem files for ruby-bbcode-to-md and the database adapter + # (e.g. mysql2) before bundler gets initialized by the base importer. + # Otherwise we get an error since those gems are not always in the Gemfile. + require 'ruby-bbcode-to-md' if @settings.use_bbcode_to_md + + begin + @database = Database.create(@settings.database) + rescue UnsupportedVersionError => error + STDERR.puts '', error.message + exit 1 + end + + require_relative 'phpbb3/importer' + Importer.new(@settings, @database).perform + end +end diff --git a/script/import_scripts/phpbb3/database/database.rb b/script/import_scripts/phpbb3/database/database.rb new file mode 100644 index 00000000000..731f05b8a12 --- /dev/null +++ b/script/import_scripts/phpbb3/database/database.rb @@ -0,0 +1,56 @@ +require 'mysql2' + +module ImportScripts::PhpBB3 + class Database + # @param database_settings [ImportScripts::PhpBB3::DatabaseSettings] + def self.create(database_settings) + Database.new(database_settings).create_database + end + + # @param database_settings [ImportScripts::PhpBB3::DatabaseSettings] + def initialize(database_settings) + @database_settings = database_settings + @database_client = create_database_client + end + + # @return [ImportScripts::PhpBB3::Database_3_0 | ImportScripts::PhpBB3::Database_3_1] + def create_database + version = get_phpbb_version + + if version.start_with?('3.0') + require_relative 'database_3_0' + Database_3_0.new(@database_client, @database_settings) + elsif version.start_with?('3.1') + require_relative 'database_3_1' + Database_3_1.new(@database_client, @database_settings) + else + raise UnsupportedVersionError, "Unsupported version (#{version}) of phpBB detected.\n" \ + << 'Currently only 3.0.x and 3.1.x are supported by this importer.' + end + end + + protected + + def create_database_client + Mysql2::Client.new( + host: @database_settings.host, + username: @database_settings.username, + password: @database_settings.password, + database: @database_settings.schema + ) + end + + def get_phpbb_version + table_prefix = @database_settings.table_prefix + + @database_client.query(<<-SQL, cache_rows: false, symbolize_keys: true).first[:config_value] + SELECT config_value + FROM #{table_prefix}_config + WHERE config_name = 'version' + SQL + end + end + + class UnsupportedVersionError < RuntimeError; + end +end diff --git a/script/import_scripts/phpbb3/database/database_3_0.rb b/script/import_scripts/phpbb3/database/database_3_0.rb new file mode 100644 index 00000000000..d4115cc38e1 --- /dev/null +++ b/script/import_scripts/phpbb3/database/database_3_0.rb @@ -0,0 +1,333 @@ +require_relative 'database_base' +require_relative '../support/constants' + +module ImportScripts::PhpBB3 + class Database_3_0 < DatabaseBase + def count_users + count(<<-SQL) + SELECT COUNT(*) AS count + FROM #{@table_prefix}_users u + JOIN #{@table_prefix}_groups g ON g.group_id = u.group_id + WHERE u.user_type != #{Constants::USER_TYPE_IGNORE} + SQL + end + + def fetch_users(offset) + query(<<-SQL) + SELECT u.user_id, u.user_email, u.username, u.user_regdate, u.user_lastvisit, u.user_ip, + u.user_type, u.user_inactive_reason, g.group_name, b.ban_start, b.ban_end, b.ban_reason, + u.user_posts, u.user_website, u.user_from, u.user_birthday, u.user_avatar_type, u.user_avatar + FROM #{@table_prefix}_users u + JOIN #{@table_prefix}_groups g ON (g.group_id = u.group_id) + LEFT OUTER JOIN #{@table_prefix}_banlist b ON ( + u.user_id = b.ban_userid AND b.ban_exclude = 0 AND + (b.ban_end = 0 OR b.ban_end >= UNIX_TIMESTAMP()) + ) + WHERE u.user_type != #{Constants::USER_TYPE_IGNORE} + ORDER BY u.user_id ASC + LIMIT #{@batch_size} + OFFSET #{offset} + SQL + end + + def count_anonymous_users + count(<<-SQL) + SELECT COUNT(DISTINCT post_username) AS count + FROM #{@table_prefix}_posts + WHERE post_username <> '' + SQL + end + + def fetch_anonymous_users(offset) + query(<<-SQL) + SELECT post_username, MIN(post_time) AS first_post_time + FROM #{@table_prefix}_posts + WHERE post_username <> '' + GROUP BY post_username + ORDER BY post_username ASC + LIMIT #{@batch_size} + OFFSET #{offset} + SQL + end + + def fetch_categories + query(<<-SQL) + SELECT f.forum_id, f.parent_id, f.forum_name, f.forum_name, f.forum_desc, x.first_post_time + FROM phpbb_forums f + LEFT OUTER JOIN ( + SELECT MIN(topic_time) AS first_post_time, forum_id + FROM phpbb_topics + GROUP BY forum_id + ) x ON (f.forum_id = x.forum_id) + WHERE f.forum_type != #{Constants::FORUM_TYPE_LINK} + ORDER BY f.parent_id ASC, f.left_id ASC + SQL + end + + def count_posts + count(<<-SQL) + SELECT COUNT(*) AS count + FROM #{@table_prefix}_posts + SQL + end + + def fetch_posts(offset) + query(<<-SQL) + SELECT p.post_id, p.topic_id, t.forum_id, t.topic_title, t.topic_first_post_id, p.poster_id, + p.post_text, p.post_time, p.post_username, t.topic_status, t.topic_type, t.poll_title, + CASE WHEN t.poll_length > 0 THEN t.poll_start + t.poll_length ELSE NULL END AS poll_end, + t.poll_max_options, p.post_attachment + FROM #{@table_prefix}_posts p + JOIN #{@table_prefix}_topics t ON (p.topic_id = t.topic_id) + ORDER BY p.post_id ASC + LIMIT #{@batch_size} + OFFSET #{offset} + SQL + end + + def get_first_post_id(topic_id) + query(<<-SQL).first[:topic_first_post_id] + SELECT topic_first_post_id + FROM #{@table_prefix}_topics + WHERE topic_id = #{topic_id} + SQL + end + + def fetch_poll_options(topic_id) + query(<<-SQL) + SELECT poll_option_id, poll_option_text, poll_option_total + FROM #{@table_prefix}_poll_options + WHERE topic_id = #{topic_id} + ORDER BY poll_option_id + SQL + end + + def fetch_poll_votes(topic_id) + # this query ignores votes from users that do not exist anymore + query(<<-SQL) + SELECT u.user_id, v.poll_option_id + FROM #{@table_prefix}_poll_votes v + JOIN #{@table_prefix}_users u ON (v.vote_user_id = u.user_id) + WHERE v.topic_id = #{topic_id} + SQL + end + + def count_voters(topic_id) + # anonymous voters can't be counted, but lets try to make the count look "correct" anyway + count(<<-SQL) + SELECT MAX(count) AS count + FROM ( + SELECT COUNT(DISTINCT vote_user_id) AS count + FROM #{@table_prefix}_poll_votes + WHERE topic_id = #{topic_id} + UNION + SELECT MAX(poll_option_total) AS count + FROM #{@table_prefix}_poll_options + WHERE topic_id = #{topic_id} + ) x + SQL + end + + def get_max_attachment_size + query(<<-SQL).first[:filesize] + SELECT IFNULL(MAX(filesize), 0) AS filesize + FROM #{@table_prefix}_attachments + SQL + end + + def fetch_attachments(topic_id, post_id) + query(<<-SQL) + SELECT physical_filename, real_filename + FROM #{@table_prefix}_attachments + WHERE topic_id = #{topic_id} AND post_msg_id = #{post_id} + ORDER BY filetime DESC, post_msg_id ASC + SQL + end + + def count_messages(use_fixed_messages) + if use_fixed_messages + count(<<-SQL) + SELECT COUNT(*) AS count + FROM #{@table_prefix}_import_privmsgs + SQL + else + count(<<-SQL) + SELECT COUNT(*) AS count + FROM #{@table_prefix}_privmsgs + SQL + end + end + + def fetch_messages(use_fixed_messages, offset) + if use_fixed_messages + query(<<-SQL) + SELECT m.msg_id, i.root_msg_id, m.author_id, m.message_time, m.message_subject, m.message_text, + IFNULL(a.attachment_count, 0) AS attachment_count + FROM #{@table_prefix}_privmsgs m + JOIN #{@table_prefix}_import_privmsgs i ON (m.msg_id = i.msg_id) + LEFT OUTER JOIN ( + SELECT post_msg_id, COUNT(*) AS attachment_count + FROM #{@table_prefix}_attachments + WHERE topic_id = 0 + GROUP BY post_msg_id + ) a ON (m.msg_id = a.post_msg_id) + ORDER BY i.root_msg_id ASC, m.msg_id ASC + LIMIT #{@batch_size} + OFFSET #{offset} + SQL + else + query(<<-SQL) + SELECT m.msg_id, m.root_level AS root_msg_id, m.author_id, m.message_time, m.message_subject, + m.message_text, IFNULL(a.attachment_count, 0) AS attachment_count + FROM #{@table_prefix}_privmsgs m + LEFT OUTER JOIN ( + SELECT post_msg_id, COUNT(*) AS attachment_count + FROM #{@table_prefix}_attachments + WHERE topic_id = 0 + GROUP BY post_msg_id + ) a ON (m.msg_id = a.post_msg_id) + ORDER BY m.root_level ASC, m.msg_id ASC + LIMIT #{@batch_size} + OFFSET #{offset} + SQL + end + end + + def fetch_message_participants(msg_id, use_fixed_messages) + if use_fixed_messages + query(<<-SQL) + SELECT m.to_address + FROM #{@table_prefix}_privmsgs m + JOIN #{@table_prefix}_import_privmsgs i ON (m.msg_id = i.msg_id) + WHERE i.msg_id = #{msg_id} OR i.root_msg_id = #{msg_id} + SQL + else + query(<<-SQL) + SELECT m.to_address + FROM #{@table_prefix}_privmsgs m + WHERE m.msg_id = #{msg_id} OR m.root_level = #{msg_id} + SQL + end + end + + def calculate_fixed_messages + drop_temp_import_message_table + create_temp_import_message_table + fill_temp_import_message_table + + drop_import_message_table + create_import_message_table + fill_import_message_table + + drop_temp_import_message_table + end + + def count_bookmarks + count(<<-SQL) + SELECT COUNT(*) AS count + FROM #{@table_prefix}_bookmarks + SQL + end + + def fetch_bookmarks(offset) + query(<<-SQL) + SELECT b.user_id, t.topic_first_post_id + FROM #{@table_prefix}_bookmarks b + JOIN #{@table_prefix}_topics t ON (b.topic_id = t.topic_id) + ORDER BY b.user_id ASC, b.topic_id ASC + LIMIT #{@batch_size} + OFFSET #{offset} + SQL + end + + def get_config_values + query(<<-SQL).first + SELECT + (SELECT config_value FROM #{@table_prefix}_config WHERE config_name = 'version') AS phpbb_version, + (SELECT config_value FROM #{@table_prefix}_config WHERE config_name = 'avatar_gallery_path') AS avatar_gallery_path, + (SELECT config_value FROM #{@table_prefix}_config WHERE config_name = 'avatar_path') AS avatar_path, + (SELECT config_value FROM #{@table_prefix}_config WHERE config_name = 'avatar_salt') AS avatar_salt, + (SELECT config_value FROM #{@table_prefix}_config WHERE config_name = 'smilies_path') AS smilies_path, + (SELECT config_value FROM #{@table_prefix}_config WHERE config_name = 'upload_path') AS attachment_path + SQL + end + + protected + + def drop_temp_import_message_table + query("DROP TABLE IF EXISTS #{@table_prefix}_import_privmsgs_temp") + end + + def create_temp_import_message_table + query(<<-SQL) + CREATE TABLE #{@table_prefix}_import_privmsgs_temp ( + msg_id MEDIUMINT(8) NOT NULL, + root_msg_id MEDIUMINT(8) NOT NULL, + recipient_id MEDIUMINT(8), + normalized_subject VARCHAR(255) NOT NULL, + PRIMARY KEY (msg_id) + ) + SQL + end + + # this removes duplicate messages, converts the to_address to a number + # and stores the message_subject in lowercase and without the prefix "Re: " + def fill_temp_import_message_table + query(<<-SQL) + INSERT INTO #{@table_prefix}_import_privmsgs_temp (msg_id, root_msg_id, recipient_id, normalized_subject) + SELECT m.msg_id, m.root_level, + CASE WHEN m.root_level = 0 AND INSTR(m.to_address, ':') = 0 THEN + CAST(SUBSTRING(m.to_address, 3) AS SIGNED INTEGER) + ELSE NULL END AS recipient_id, + LOWER(CASE WHEN m.message_subject LIKE 'Re: %' THEN + SUBSTRING(m.message_subject, 5) + ELSE m.message_subject END) AS normalized_subject + FROM #{@table_prefix}_privmsgs m + WHERE NOT EXISTS ( + SELECT 1 + FROM #{@table_prefix}_privmsgs x + WHERE x.msg_id < m.msg_id AND x.root_level = m.root_level AND x.author_id = m.author_id + AND x.to_address = m.to_address AND x.message_time = m.message_time + ) + SQL + end + + def drop_import_message_table + query("DROP TABLE IF EXISTS #{@table_prefix}_import_privmsgs") + end + + def create_import_message_table + query(<<-SQL) + CREATE TABLE #{@table_prefix}_import_privmsgs ( + msg_id MEDIUMINT(8) NOT NULL, + root_msg_id MEDIUMINT(8) NOT NULL, + PRIMARY KEY (msg_id), + INDEX #{@table_prefix}_import_privmsgs_root_msg_id (root_msg_id) + ) + SQL + end + + # this tries to calculate the actual root_level (= msg_id of the first message in a + # private conversation) based on subject, time, author and recipient + def fill_import_message_table + query(<<-SQL) + INSERT INTO #{@table_prefix}_import_privmsgs (msg_id, root_msg_id) + SELECT m.msg_id, CASE WHEN i.root_msg_id = 0 THEN + COALESCE(( + SELECT a.msg_id + FROM #{@table_prefix}_privmsgs a + JOIN #{@table_prefix}_import_privmsgs_temp b ON (a.msg_id = b.msg_id) + WHERE ((a.author_id = m.author_id AND b.recipient_id = i.recipient_id) OR + (a.author_id = i.recipient_id AND b.recipient_id = m.author_id)) + AND b.normalized_subject = i.normalized_subject + AND a.msg_id <> m.msg_id + AND a.message_time < m.message_time + ORDER BY a.message_time ASC + LIMIT 1 + ), 0) ELSE i.root_msg_id END AS root_msg_id + FROM #{@table_prefix}_privmsgs m + JOIN #{@table_prefix}_import_privmsgs_temp i ON (m.msg_id = i.msg_id) + SQL + end + end +end diff --git a/script/import_scripts/phpbb3/database/database_3_1.rb b/script/import_scripts/phpbb3/database/database_3_1.rb new file mode 100644 index 00000000000..bf13546e2d0 --- /dev/null +++ b/script/import_scripts/phpbb3/database/database_3_1.rb @@ -0,0 +1,26 @@ +require_relative 'database_3_0' +require_relative '../support/constants/constants' + +module ImportScripts::PhpBB3 + class Database_3_1 < Database_3_0 + def fetch_users(offset) + query(<<-SQL) + SELECT u.user_id, u.user_email, u.username, u.user_regdate, u.user_lastvisit, u.user_ip, + u.user_type, u.user_inactive_reason, g.group_name, b.ban_start, b.ban_end, b.ban_reason, + u.user_posts, f.pf_phpbb_website AS user_website, f.pf_phpbb_location AS user_from, + u.user_birthday, u.user_avatar_type, u.user_avatar + FROM #{@table_prefix}_users u + JOIN #{@table_prefix}_profile_fields_data f ON (u.user_id = f.user_id) + JOIN #{@table_prefix}_groups g ON (g.group_id = u.group_id) + LEFT OUTER JOIN #{@table_prefix}_banlist b ON ( + u.user_id = b.ban_userid AND b.ban_exclude = 0 AND + (b.ban_end = 0 OR b.ban_end >= UNIX_TIMESTAMP()) + ) + WHERE u.user_type != #{Constants::USER_TYPE_IGNORE} + ORDER BY u.user_id ASC + LIMIT #{@batch_size} + OFFSET #{offset} + SQL + end + end +end diff --git a/script/import_scripts/phpbb3/database/database_base.rb b/script/import_scripts/phpbb3/database/database_base.rb new file mode 100644 index 00000000000..3c8b4b37181 --- /dev/null +++ b/script/import_scripts/phpbb3/database/database_base.rb @@ -0,0 +1,24 @@ +module ImportScripts::PhpBB3 + class DatabaseBase + # @param database_client [Mysql2::Client] + # @param database_settings [ImportScripts::PhpBB3::DatabaseSettings] + def initialize(database_client, database_settings) + @database_client = database_client + + @batch_size = database_settings.batch_size + @table_prefix = database_settings.table_prefix + end + + protected + + # Executes a database query. + def query(sql) + @database_client.query(sql, cache_rows: false, symbolize_keys: true) + end + + # Executes a database query and returns the value of the 'count' column. + def count(sql) + query(sql).first[:count] + end + end +end diff --git a/script/import_scripts/phpbb3/importer.rb b/script/import_scripts/phpbb3/importer.rb new file mode 100644 index 00000000000..28c2ed6fa63 --- /dev/null +++ b/script/import_scripts/phpbb3/importer.rb @@ -0,0 +1,152 @@ +require_relative '../base' +require_relative 'support/settings' +require_relative 'database/database' +require_relative 'importers/importer_factory' + +module ImportScripts::PhpBB3 + class Importer < ImportScripts::Base + # @param settings [ImportScripts::PhpBB3::Settings] + # @param database [ImportScripts::PhpBB3::Database_3_0 | ImportScripts::PhpBB3::Database_3_1] + def initialize(settings, database) + @settings = settings + super() + + @database = database + @php_config = database.get_config_values + @importers = ImporterFactory.new(@database, @lookup, @uploader, @settings, @php_config) + end + + def perform + super if settings_check_successful? + end + + protected + + def execute + puts '', "importing from phpBB #{@php_config[:phpbb_version]}" + + import_users + import_anonymous_users if @settings.import_anonymous_users + import_categories + import_posts + import_private_messages if @settings.import_private_messages + import_bookmarks if @settings.import_bookmarks + end + + def get_site_settings_for_import + settings = super + + max_file_size_kb = @database.get_max_attachment_size + settings[:max_image_size_kb] = [max_file_size_kb, SiteSetting.max_image_size_kb].max + settings[:max_attachment_size_kb] = [max_file_size_kb, SiteSetting.max_attachment_size_kb].max + + settings + end + + def settings_check_successful? + true + end + + def import_users + puts '', 'creating users' + total_count = @database.count_users + importer = @importers.user_importer + + batches do |offset| + rows = @database.fetch_users(offset) + break if rows.size < 1 + + create_users(rows, total: total_count, offset: offset) do |row| + importer.map_user(row) + end + end + end + + def import_anonymous_users + puts '', 'creating anonymous users' + total_count = @database.count_anonymous_users + importer = @importers.user_importer + + batches do |offset| + rows = @database.fetch_anonymous_users(offset) + break if rows.size < 1 + + create_users(rows, total: total_count, offset: offset) do |row| + importer.map_anonymous_user(row) + end + end + end + + def import_categories + puts '', 'creating categories' + rows = @database.fetch_categories + importer = @importers.category_importer + + create_categories(rows) do |row| + importer.map_category(row) + end + end + + def import_posts + puts '', 'creating topics and posts' + total_count = @database.count_posts + importer = @importers.post_importer + + batches do |offset| + rows = @database.fetch_posts(offset) + break if rows.size < 1 + + create_posts(rows, total: total_count, offset: offset) do |row| + importer.map_post(row) + end + end + end + + def import_private_messages + if @settings.fix_private_messages + puts '', 'fixing private messages' + @database.calculate_fixed_messages + end + + puts '', 'creating private messages' + total_count = @database.count_messages(@settings.fix_private_messages) + importer = @importers.message_importer + + batches do |offset| + rows = @database.fetch_messages(@settings.fix_private_messages, offset) + break if rows.size < 1 + + create_posts(rows, total: total_count, offset: offset) do |row| + importer.map_message(row) + end + end + end + + def import_bookmarks + puts '', 'creating bookmarks' + total_count = @database.count_bookmarks + importer = @importers.bookmark_importer + + batches do |offset| + rows = @database.fetch_bookmarks(offset) + break if rows.size < 1 + + create_bookmarks(rows, total: total_count, offset: offset) do |row| + importer.map_bookmark(row) + end + end + end + + def update_last_seen_at + # no need for this since the importer sets last_seen_at for each user during the import + end + + def use_bbcode_to_md? + @settings.use_bbcode_to_md + end + + def batches + super(@settings.database.batch_size) + end + end +end diff --git a/script/import_scripts/phpbb3/importers/attachment_importer.rb b/script/import_scripts/phpbb3/importers/attachment_importer.rb new file mode 100644 index 00000000000..e41ca7a1209 --- /dev/null +++ b/script/import_scripts/phpbb3/importers/attachment_importer.rb @@ -0,0 +1,36 @@ +module ImportScripts::PhpBB3 + class AttachmentImporter + # @param database [ImportScripts::PhpBB3::Database_3_0 | ImportScripts::PhpBB3::Database_3_1] + # @param uploader [ImportScripts::Uploader] + # @param settings [ImportScripts::PhpBB3::Settings] + # @param phpbb_config [Hash] + def initialize(database, uploader, settings, phpbb_config) + @database = database + @uploader = uploader + + @attachment_path = File.join(settings.base_dir, phpbb_config[:attachment_path]) + end + + def import_attachments(user_id, post_id, topic_id = 0) + rows = @database.fetch_attachments(topic_id, post_id) + return nil if rows.size < 1 + + attachments = [] + + rows.each do |row| + path = File.join(@attachment_path, row[:physical_filename]) + filename = CGI.unescapeHTML(row[:real_filename]) + upload = @uploader.create_upload(user_id, path, filename) + + if upload.nil? || !upload.valid? + puts "Failed to upload #{path}" + puts upload.errors.inspect if upload + else + attachments << @uploader.html_for_upload(upload, filename) + end + end + + attachments + end + end +end diff --git a/script/import_scripts/phpbb3/importers/avatar_importer.rb b/script/import_scripts/phpbb3/importers/avatar_importer.rb new file mode 100644 index 00000000000..3db8b701004 --- /dev/null +++ b/script/import_scripts/phpbb3/importers/avatar_importer.rb @@ -0,0 +1,107 @@ +module ImportScripts::PhpBB3 + class AvatarImporter + # @param uploader [ImportScripts::Uploader] + # @param settings [ImportScripts::PhpBB3::Settings] + # @param phpbb_config [Hash] + def initialize(uploader, settings, phpbb_config) + @uploader = uploader + @settings = settings + + @uploaded_avatar_path = File.join(settings.base_dir, phpbb_config[:avatar_path]) + @gallery_path = File.join(settings.base_dir, phpbb_config[:avatar_gallery_path]) + @avatar_salt = phpbb_config[:avatar_salt] + end + + def import_avatar(user, row) + avatar_type = row[:user_avatar_type] + return unless is_avatar_importable?(user, avatar_type) + + filename = row[:user_avatar] + path = get_avatar_path(avatar_type, filename) + return if path.nil? + + begin + filename = "avatar#{File.extname(path)}" + upload = @uploader.create_upload(user.id, path, filename) + + if upload.persisted? + user.import_mode = false + user.create_user_avatar + user.import_mode = true + user.user_avatar.update(custom_upload_id: upload.id) + user.update(uploaded_avatar_id: upload.id) + else + Rails.logger.error("Could not persist avatar for user #{user.username}") + end + rescue SystemCallError => err + Rails.logger.error("Could not import avatar for user #{user.username}: #{err.message}") + end + end + + protected + + def is_avatar_importable?(user, avatar_type) + is_allowed_avatar_type?(avatar_type) && user.uploaded_avatar_id.blank? + end + + def get_avatar_path(avatar_type, filename) + case avatar_type + when Constants::AVATAR_TYPE_UPLOADED then + filename.gsub!(/_[0-9]+\./, '.') # we need 1337.jpg, not 1337_2983745.jpg + get_uploaded_path(filename) + when Constants::AVATAR_TYPE_GALLERY then + get_gallery_path(filename) + when Constants::AVATAR_TYPE_REMOTE then + download_avatar(filename) + else + Rails.logger.error("Invalid avatar type #{avatar_type}. Skipping...") + nil + end + end + + # Tries to download the remote avatar. + def download_avatar(url) + max_image_size_kb = SiteSetting.max_image_size_kb.kilobytes + + begin + avatar_file = FileHelper.download(url, max_image_size_kb, 'discourse-avatar') + rescue StandardError => err + warn "Error downloading avatar: #{err.message}. Skipping..." + return nil + end + + if avatar_file + if avatar_file.size <= max_image_size_kb + return avatar_file + else + Rails.logger.error("Failed to download remote avatar: #{url} - Image is larger than #{max_image_size_kb} KB") + return nil + end + end + + Rails.logger.error("There was an error while downloading '#{url}' locally.") + nil + end + + def get_uploaded_path(filename) + File.join(@uploaded_avatar_path, "#{@avatar_salt}_#{filename}") + end + + def get_gallery_path(filename) + File.join(@gallery_path, filename) + end + + def is_allowed_avatar_type?(avatar_type) + case avatar_type + when Constants::AVATAR_TYPE_UPLOADED then + @settings.import_uploaded_avatars + when Constants::AVATAR_TYPE_REMOTE then + @settings.import_remote_avatars + when Constants::AVATAR_TYPE_GALLERY then + @settings.import_gallery_avatars + else + false + end + end + end +end diff --git a/script/import_scripts/phpbb3/importers/bookmark_importer.rb b/script/import_scripts/phpbb3/importers/bookmark_importer.rb new file mode 100644 index 00000000000..febc8ab8697 --- /dev/null +++ b/script/import_scripts/phpbb3/importers/bookmark_importer.rb @@ -0,0 +1,10 @@ +module ImportScripts::PhpBB3 + class BookmarkImporter + def map_bookmark(row) + { + user_id: row[:user_id], + post_id: row[:topic_first_post_id] + } + end + end +end diff --git a/script/import_scripts/phpbb3/importers/category_importer.rb b/script/import_scripts/phpbb3/importers/category_importer.rb new file mode 100644 index 00000000000..65eeb4097e6 --- /dev/null +++ b/script/import_scripts/phpbb3/importers/category_importer.rb @@ -0,0 +1,47 @@ +module ImportScripts::PhpBB3 + class CategoryImporter + # @param lookup [ImportScripts::LookupContainer] + # @param text_processor [ImportScripts::PhpBB3::TextProcessor] + def initialize(lookup, text_processor) + @lookup = lookup + @text_processor = text_processor + end + + def map_category(row) + { + id: row[:forum_id], + name: CGI.unescapeHTML(row[:forum_name]), + parent_category_id: @lookup.category_id_from_imported_category_id(row[:parent_id]), + post_create_action: proc do |category| + update_category_description(category, row) + end + } + end + + protected + + # @param category [Category] + def update_category_description(category, row) + return if row[:forum_desc].blank? && row[:first_post_time].blank? + + topic = category.topic + post = topic.first_post + + if row[:first_post_time].present? + created_at = Time.zone.at(row[:first_post_time]) + + topic.created_at = created_at + topic.save + + post.created_at = created_at + post.save + end + + if row[:forum_desc].present? + changes = {raw: @text_processor.process_raw_text(row[:forum_desc])} + opts = {revised_at: post.created_at, bypass_bump: true} + post.revise(Discourse.system_user, changes, opts) + end + end + end +end diff --git a/script/import_scripts/phpbb3/importers/importer_factory.rb b/script/import_scripts/phpbb3/importers/importer_factory.rb new file mode 100644 index 00000000000..4b793a153ae --- /dev/null +++ b/script/import_scripts/phpbb3/importers/importer_factory.rb @@ -0,0 +1,69 @@ +require_relative 'attachment_importer' +require_relative 'avatar_importer' +require_relative 'bookmark_importer' +require_relative 'category_importer' +require_relative 'message_importer' +require_relative 'poll_importer' +require_relative 'post_importer' +require_relative 'user_importer' +require_relative '../support/smiley_processor' +require_relative '../support/text_processor' + +module ImportScripts::PhpBB3 + class ImporterFactory + # @param database [ImportScripts::PhpBB3::Database_3_0 | ImportScripts::PhpBB3::Database_3_1] + # @param lookup [ImportScripts::LookupContainer] + # @param uploader [ImportScripts::Uploader] + # @param settings [ImportScripts::PhpBB3::Settings] + # @param phpbb_config [Hash] + def initialize(database, lookup, uploader, settings, phpbb_config) + @database = database + @lookup = lookup + @uploader = uploader + @settings = settings + @phpbb_config = phpbb_config + end + + def user_importer + UserImporter.new(avatar_importer, @settings) + end + + def category_importer + CategoryImporter.new(@lookup, text_processor) + end + + def post_importer + PostImporter.new(@lookup, text_processor, attachment_importer, poll_importer, @settings) + end + + def message_importer + MessageImporter.new(@database, @lookup, text_processor, attachment_importer, @settings) + end + + def bookmark_importer + BookmarkImporter.new + end + + protected + + def attachment_importer + AttachmentImporter.new(@database, @uploader, @settings, @phpbb_config) + end + + def avatar_importer + AvatarImporter.new(@uploader, @settings, @phpbb_config) + end + + def poll_importer + PollImporter.new(@lookup, @database, text_processor) + end + + def text_processor + @text_processor ||= TextProcessor.new(@lookup, @database, smiley_processor, @settings) + end + + def smiley_processor + SmileyProcessor.new(@uploader, @settings, @phpbb_config) + end + end +end diff --git a/script/import_scripts/phpbb3/importers/message_importer.rb b/script/import_scripts/phpbb3/importers/message_importer.rb new file mode 100644 index 00000000000..6200b0b0230 --- /dev/null +++ b/script/import_scripts/phpbb3/importers/message_importer.rb @@ -0,0 +1,83 @@ +module ImportScripts::PhpBB3 + class MessageImporter + # @param database [ImportScripts::PhpBB3::Database_3_0 | ImportScripts::PhpBB3::Database_3_1] + # @param lookup [ImportScripts::LookupContainer] + # @param text_processor [ImportScripts::PhpBB3::TextProcessor] + # @param attachment_importer [ImportScripts::PhpBB3::AttachmentImporter] + # @param settings [ImportScripts::PhpBB3::Settings] + def initialize(database, lookup, text_processor, attachment_importer, settings) + @database = database + @lookup = lookup + @text_processor = text_processor + @attachment_importer = attachment_importer + @settings = settings + end + + def map_message(row) + user_id = @lookup.user_id_from_imported_user_id(row[:author_id]) || Discourse.system_user.id + attachments = import_attachments(row, user_id) + + mapped = { + id: "pm:#{row[:msg_id]}", + user_id: user_id, + created_at: Time.zone.at(row[:message_time]), + raw: @text_processor.process_private_msg(row[:message_text], attachments) + } + + if row[:root_msg_id] == 0 + map_first_message(row, mapped) + else + map_other_message(row, mapped) + end + end + + protected + + def import_attachments(row, user_id) + if @settings.import_attachments && row[:attachment_count] > 0 + @attachment_importer.import_attachments(user_id, row[:msg_id]) + end + end + + def map_first_message(row, mapped) + mapped[:title] = CGI.unescapeHTML(row[:message_subject]) + mapped[:archetype] = Archetype.private_message + mapped[:target_usernames] = get_usernames(row[:msg_id], row[:author_id]) + + if mapped[:target_usernames].empty? # pm with yourself? + puts "Private message without recipients. Skipping #{row[:msg_id]}: #{row[:message_subject][0..40]}" + return nil + end + + mapped + end + + def map_other_message(row, mapped) + parent_msg_id = "pm:#{row[:root_msg_id]}" + parent = @lookup.topic_lookup_from_imported_post_id(parent_msg_id) + + if parent.blank? + puts "Parent post #{parent_msg_id} doesn't exist. Skipping #{row[:msg_id]}: #{row[:message_subject][0..40]}" + return nil + end + + mapped[:topic_id] = parent[:topic_id] + mapped + end + + def get_usernames(msg_id, author_id) + # Find the users who are part of this private message. + # Found from the to_address of phpbb_privmsgs, by looking at + # all the rows with the same root_msg_id. + # to_address looks like this: "u_91:u_1234:u_200" + # The "u_" prefix is discarded and the rest is a user_id. + import_user_ids = @database.fetch_message_participants(msg_id, @settings.fix_private_messages) + .map { |r| r[:to_address].split(':') } + .flatten!.uniq.map! { |u| u[2..-1] } + + import_user_ids.map! do |import_user_id| + import_user_id.to_s == author_id.to_s ? nil : @lookup.find_user_by_import_id(import_user_id).try(:username) + end.compact + end + end +end diff --git a/script/import_scripts/phpbb3/importers/poll_importer.rb b/script/import_scripts/phpbb3/importers/poll_importer.rb new file mode 100644 index 00000000000..665aae94e1d --- /dev/null +++ b/script/import_scripts/phpbb3/importers/poll_importer.rb @@ -0,0 +1,155 @@ +module ImportScripts::PhpBB3 + class PollImporter + POLL_PLUGIN_NAME = 'poll' + + # @param lookup [ImportScripts::LookupContainer] + # @param database [ImportScripts::PhpBB3::Database_3_0 | ImportScripts::PhpBB3::Database_3_1] + # @param text_processor [ImportScripts::PhpBB3::TextProcessor] + def initialize(lookup, database, text_processor) + @lookup = lookup + @database = database + @text_processor = text_processor + + poll_plugin = Discourse.plugins.find { |p| p.metadata.name == POLL_PLUGIN_NAME }.singleton_class + @default_poll_name = poll_plugin.const_get(:DEFAULT_POLL_NAME) + @polls_field = poll_plugin.const_get(:POLLS_CUSTOM_FIELD) + @votes_field = poll_plugin.const_get(:VOTES_CUSTOM_FIELD) + end + + # @param poll [ImportScripts::PhpBB3::Poll] + def map_poll(topic_id, poll) + options = get_poll_options(topic_id) + poll_text = get_poll_text(options, poll) + extracted_poll = extract_default_poll(topic_id, poll_text) + + update_poll(extracted_poll, options, topic_id, poll) + + mapped_poll = { + raw: poll_text, + custom_fields: {} + } + + add_polls_field(mapped_poll[:custom_fields], extracted_poll) + add_vote_fields(mapped_poll[:custom_fields], topic_id, poll) + mapped_poll + end + + protected + + def get_poll_options(topic_id) + rows = @database.fetch_poll_options(topic_id) + options_by_text = {} + + rows.each do |row| + option_text = @text_processor.process_raw_text(row[:poll_option_text]).delete("\n") + + if options_by_text.key?(option_text) + # phpBB allows duplicate options (why?!) - we need to merge them + option = options_by_text[option_text] + option[:ids] << row[:poll_option_id] + option[:votes] += row[:poll_option_total] + else + options_by_text[option_text] = { + ids: [row[:poll_option_id]], + text: option_text, + votes: row[:poll_option_total] + } + end + end + + options_by_text.values + end + + # @param options [Array] + # @param poll [ImportScripts::PhpBB3::Poll] + def get_poll_text(options, poll) + poll_text = "#{poll.title}\n" + + if poll.max_options > 1 + poll_text << "[poll type=multiple max=#{poll.max_options}]" + else + poll_text << '[poll]' + end + + options.each do |option| + poll_text << "\n- #{option[:text]}" + end + + poll_text << "\n[/poll]" + end + + def extract_default_poll(topic_id, poll_text) + extracted_polls = DiscoursePoll::Poll::extract(poll_text, topic_id) + extracted_polls.each do |poll| + return poll if poll['name'] == @default_poll_name + end + end + + # @param poll [ImportScripts::PhpBB3::Poll] + def update_poll(default_poll, imported_options, topic_id, poll) + default_poll['voters'] = @database.count_voters(topic_id) # this includes anonymous voters + default_poll['status'] = poll.has_ended? ? :open : :closed + + default_poll['options'].each_with_index do |option, index| + imported_option = imported_options[index] + option['votes'] = imported_option[:votes] + poll.add_option_id(imported_option[:ids], option['id']) + end + end + + def add_polls_field(custom_fields, default_poll) + custom_fields[@polls_field] = {@default_poll_name => default_poll} + end + + # @param custom_fields [Hash] + # @param poll [ImportScripts::PhpBB3::Poll] + def add_vote_fields(custom_fields, topic_id, poll) + rows = @database.fetch_poll_votes(topic_id) + warned = false + + rows.each do |row| + option_id = poll.option_id_from_imported_option_id(row[:poll_option_id]) + user_id = @lookup.user_id_from_imported_user_id(row[:user_id]) + + if option_id.present? && user_id.present? + key = "#{@votes_field}-#{user_id}" + + if custom_fields.key?(key) + votes = custom_fields[key][@default_poll_name] + else + votes = [] + custom_fields[key] = {@default_poll_name => votes} + end + + votes << option_id + else !warned + Rails.logger.warn("Topic with id #{topic_id} has invalid votes.") + end + end + end + end + + class Poll + attr_reader :title + attr_reader :max_options + + def initialize(title, max_options, end_timestamp) + @title = title + @max_options = max_options + @end_timestamp = end_timestamp + @option_ids = {} + end + + def has_ended? + @end_timestamp.nil? || Time.zone.at(@end_timestamp) > Time.now + end + + def add_option_id(imported_ids, option_id) + imported_ids.each { |imported_id| @option_ids[imported_id] = option_id } + end + + def option_id_from_imported_option_id(imported_id) + @option_ids[imported_id] + end + end +end diff --git a/script/import_scripts/phpbb3/importers/post_importer.rb b/script/import_scripts/phpbb3/importers/post_importer.rb new file mode 100644 index 00000000000..be0daebbf22 --- /dev/null +++ b/script/import_scripts/phpbb3/importers/post_importer.rb @@ -0,0 +1,79 @@ +module ImportScripts::PhpBB3 + class PostImporter + # @param lookup [ImportScripts::LookupContainer] + # @param text_processor [ImportScripts::PhpBB3::TextProcessor] + # @param attachment_importer [ImportScripts::PhpBB3::AttachmentImporter] + # @param poll_importer [ImportScripts::PhpBB3::PollImporter] + # @param settings [ImportScripts::PhpBB3::Settings] + def initialize(lookup, text_processor, attachment_importer, poll_importer, settings) + @lookup = lookup + @text_processor = text_processor + @attachment_importer = attachment_importer + @poll_importer = poll_importer + @settings = settings + end + + def map_post(row) + imported_user_id = row[:post_username].blank? ? row[:poster_id] : row[:post_username] + user_id = @lookup.user_id_from_imported_user_id(imported_user_id) || Discourse.system_user.id + is_first_post = row[:post_id] == row[:topic_first_post_id] + + attachments = import_attachments(row, user_id) + + mapped = { + id: row[:post_id], + user_id: user_id, + created_at: Time.zone.at(row[:post_time]), + raw: @text_processor.process_post(row[:post_text], attachments) + } + + if is_first_post + map_first_post(row, mapped) + else + map_other_post(row, mapped) + end + end + + protected + + def import_attachments(row, user_id) + if @settings.import_attachments && row[:post_attachment] > 0 + @attachment_importer.import_attachments(user_id, row[:post_id], row[:topic_id]) + end + end + + def map_first_post(row, mapped) + mapped[:category] = @lookup.category_id_from_imported_category_id(row[:forum_id]) + mapped[:title] = CGI.unescapeHTML(row[:topic_title]).strip[0...255] + mapped[:pinned_at] = mapped[:created_at] unless row[:topic_type] == Constants::POST_NORMAL + mapped[:pinned_globally] = row[:topic_type] == Constants::POST_GLOBAL + + add_poll(row, mapped) if @settings.import_polls + mapped + end + + def map_other_post(row, mapped) + parent = @lookup.topic_lookup_from_imported_post_id(row[:topic_first_post_id]) + + if parent.blank? + puts "Parent post #{row[:topic_first_post_id]} doesn't exist. Skipping #{row[:post_id]}: #{row[:topic_title][0..40]}" + return nil + end + + mapped[:topic_id] = parent[:topic_id] + mapped + end + + def add_poll(row, mapped_post) + return if row[:poll_title].blank? + + poll = Poll.new(row[:poll_title], row[:poll_max_options], row[:poll_end]) + mapped_poll = @poll_importer.map_poll(row[:topic_id], poll) + + if mapped_poll.present? + mapped_post[:raw] = mapped_poll[:raw] << "\n" << mapped_post[:raw] + mapped_post[:custom_fields] = mapped_poll[:custom_fields] + end + end + end +end diff --git a/script/import_scripts/phpbb3/importers/user_importer.rb b/script/import_scripts/phpbb3/importers/user_importer.rb new file mode 100644 index 00000000000..0a9ba8c45be --- /dev/null +++ b/script/import_scripts/phpbb3/importers/user_importer.rb @@ -0,0 +1,97 @@ +require_relative '../support/constants' + +module ImportScripts::PhpBB3 + class UserImporter + # @param avatar_importer [ImportScripts::PhpBB3::AvatarImporter] + # @param settings [ImportScripts::PhpBB3::Settings] + def initialize(avatar_importer, settings) + @avatar_importer = avatar_importer + @settings = settings + end + + def map_user(row) + is_active_user = row[:user_inactive_reason] != Constants::INACTIVE_REGISTER + + { + id: row[:user_id], + email: row[:user_email], + username: row[:username], + name: @settings.username_as_name ? row[:username] : '', + created_at: Time.zone.at(row[:user_regdate]), + last_seen_at: row[:user_lastvisit] == 0 ? Time.zone.at(row[:user_regdate]) : Time.zone.at(row[:user_lastvisit]), + registration_ip_address: (IPAddr.new(row[:user_ip]) rescue nil), + active: is_active_user, + trust_level: row[:user_posts] == 0 ? TrustLevel[0] : TrustLevel[1], + approved: is_active_user, + approved_by_id: is_active_user ? Discourse.system_user.id : nil, + approved_at: is_active_user ? Time.now : nil, + moderator: row[:group_name] == Constants::GROUP_MODERATORS, + admin: row[:group_name] == Constants::GROUP_ADMINISTRATORS, + website: row[:user_website], + location: row[:user_from], + date_of_birth: parse_birthdate(row), + post_create_action: proc do |user| + suspend_user(user, row) + @avatar_importer.import_avatar(user, row) if row[:user_avatar_type].present? + end + } + end + + def map_anonymous_user(row) + username = row[:post_username] + + { + id: username, + email: "anonymous_no_email_#{username}", + username: username, + name: '', + created_at: Time.zone.at(row[:first_post_time]), + active: true, + trust_level: TrustLevel[0], + approved: true, + approved_by_id: Discourse.system_user.id, + approved_at: Time.now, + post_create_action: proc do |user| + row[:user_inactive_reason] = Constants::INACTIVE_MANUAL + row[:ban_reason] = 'Anonymous user from phpBB3' # TODO i18n + suspend_user(user, row, true) + end + } + end + + protected + + def parse_birthdate(row) + return nil if row[:user_birthday].blank? + Date.strptime(row[:user_birthday].delete(' '), '%d-%m-%Y') rescue nil + end + + # Suspends the user if it is currently banned. + def suspend_user(user, row, disable_email = false) + if row[:user_inactive_reason] == Constants::INACTIVE_MANUAL + user.suspended_at = Time.now + user.suspended_till = 200.years.from_now + ban_reason = row[:ban_reason].blank? ? 'Account deactivated by administrator' : row[:ban_reason] # TODO i18n + elsif row[:ban_start].present? + user.suspended_at = Time.zone.at(row[:ban_start]) + user.suspended_till = row[:ban_end] > 0 ? Time.zone.at(row[:ban_end]) : 200.years.from_now + ban_reason = row[:ban_reason] + else + return + end + + if disable_email + user.email_digests = false + user.email_private_messages = false + user.email_direct = false + user.email_always = false + end + + if user.save + StaffActionLogger.new(Discourse.system_user).log_user_suspend(user, ban_reason) + else + Rails.logger.error("Failed to suspend user #{user.username}. #{user.errors.try(:full_messages).try(:inspect)}") + end + end + end +end diff --git a/script/import_scripts/phpbb3/settings.yml b/script/import_scripts/phpbb3/settings.yml new file mode 100644 index 00000000000..b591d39646b --- /dev/null +++ b/script/import_scripts/phpbb3/settings.yml @@ -0,0 +1,59 @@ +# This is an example settings file for the phpBB3 importer. + +database: + type: MySQL # currently only MySQL is supported - more to come soon + host: localhost + username: root + password: + schema: phpbb + table_prefix: phpbb # Usually all table names start with phpbb. Change this, if your forum is using a different prefix. + batch_size: 1000 # Don't change this unless you know what you're doing. The default (1000) should work just fine. + +import: + # Enable this option if you want to have a better conversion of BBCodes to Markdown. + # WARNING: This can slow down your import. + use_bbcode_to_md: false + + # This is the path to the root directory of your current phpBB installation (or a copy of it). + # The importer expects to find the /files and /images directories within the base directory. + # This is only needed if you want to import avatars, attachments or custom smilies. + phpbb_base_dir: /var/www/phpbb + + site_prefix: + # this is needed for rewriting internal links in posts + original: oldsite.example.com/forums # without http(s):// + new: http://discourse.example.com # with http:// or https:// + + avatars: + uploaded: true # import uploaded avatars + gallery: true # import the predefined avatars phpBB offers + remote: false # WARNING: This can considerably slow down your import. It will try to download remote avatars. + + # When true: Anonymous users are imported as suspended users. They can't login and have no email address. + # When false: The system user will be used for all anonymous users. + anonymous_users: true + + # By default all the following things get imported. You can disable them by setting them to false. + bookmarks: true + attachments: true + private_messages: true + polls: true + + # This tries to fix Private Messages that were imported from phpBB2 to phpBB3. + # You should enable this option if you see duplicate messages or lots of related + # messages as topics with just one post (e.g. 'Importer', 'Re: Importer', 'Re: Importer' + # should be one topic named 'Importer' and consist of 3 posts). + fix_private_messages: false + + # When true: each imported user will have the original username from phpBB as its name + # When false: the name of each user will be blank + username_as_name: false + + # Map Emojis to smilies used in phpBB. Most of the default smilies already have a mapping, but you can override + # the mappings here, if you don't like some of them. + # The mapping syntax is: emoji_name: 'smiley_in_phpbb' + # Or map multiple smilies to one Emoji: emoji_name: ['smiley1', 'smiley2'] + emojis: + # here are two example mappings... + smiley: [':D', ':-D', ':grin:'] + heart: ':love:' diff --git a/script/import_scripts/phpbb3/support/constants.rb b/script/import_scripts/phpbb3/support/constants.rb new file mode 100644 index 00000000000..af7482d5da6 --- /dev/null +++ b/script/import_scripts/phpbb3/support/constants.rb @@ -0,0 +1,35 @@ +module ImportScripts::PhpBB3 + class Constants + ACTIVE_USER = 0 + INACTIVE_REGISTER = 1 # Newly registered account + INACTIVE_PROFILE = 2 # Profile details changed + INACTIVE_MANUAL = 3 # Account deactivated by administrator + INACTIVE_REMIND = 4 # Forced user account reactivation + + GROUP_ADMINISTRATORS = 'ADMINISTRATORS' + GROUP_MODERATORS = 'GLOBAL_MODERATORS' + + # https://wiki.phpbb.com/Table.phpbb_users + USER_TYPE_NORMAL = 0 + USER_TYPE_INACTIVE = 1 + USER_TYPE_IGNORE = 2 + USER_TYPE_FOUNDER = 3 + + AVATAR_TYPE_UPLOADED = 1 + AVATAR_TYPE_REMOTE = 2 + AVATAR_TYPE_GALLERY = 3 + + FORUM_TYPE_CATEGORY = 0 + FORUM_TYPE_POST = 1 + FORUM_TYPE_LINK = 2 + + TOPIC_UNLOCKED = 0 + TOPIC_LOCKED = 1 + TOPIC_MOVED = 2 + + POST_NORMAL = 0 + POST_STICKY = 1 + POST_ANNOUNCE = 2 + POST_GLOBAL = 3 + end +end diff --git a/script/import_scripts/phpbb3/support/settings.rb b/script/import_scripts/phpbb3/support/settings.rb new file mode 100644 index 00000000000..8a0c36ee199 --- /dev/null +++ b/script/import_scripts/phpbb3/support/settings.rb @@ -0,0 +1,78 @@ +require 'yaml' + +module ImportScripts::PhpBB3 + class Settings + def self.load(filename) + yaml = YAML::load_file(filename) + Settings.new(yaml) + end + + attr_reader :import_anonymous_users + attr_reader :import_attachments + attr_reader :import_private_messages + attr_reader :import_polls + attr_reader :import_bookmarks + + attr_reader :import_uploaded_avatars + attr_reader :import_remote_avatars + attr_reader :import_gallery_avatars + + attr_reader :fix_private_messages + attr_reader :use_bbcode_to_md + + attr_reader :original_site_prefix + attr_reader :new_site_prefix + attr_reader :base_dir + + attr_reader :username_as_name + attr_reader :emojis + + attr_reader :database + + def initialize(yaml) + import_settings = yaml['import'] + @import_anonymous_users = import_settings['anonymous_users'] + @import_attachments = import_settings['attachments'] + @import_private_messages = import_settings['private_messages'] + @import_polls = import_settings['polls'] + @import_bookmarks = import_settings['bookmarks'] + + avatar_settings = import_settings['avatars'] + @import_uploaded_avatars = avatar_settings['uploaded'] + @import_remote_avatars = avatar_settings['remote'] + @import_gallery_avatars = avatar_settings['gallery'] + + @fix_private_messages = import_settings['fix_private_messages'] + @use_bbcode_to_md =import_settings['use_bbcode_to_md'] + + @original_site_prefix = import_settings['site_prefix']['original'] + @new_site_prefix = import_settings['site_prefix']['new'] + @base_dir = import_settings['phpbb_base_dir'] + + @username_as_name = import_settings['username_as_name'] + @emojis = import_settings.fetch('emojis', []) + + @database = DatabaseSettings.new(yaml['database']) + end + end + + class DatabaseSettings + attr_reader :type + attr_reader :host + attr_reader :username + attr_reader :password + attr_reader :schema + attr_reader :table_prefix + attr_reader :batch_size + + def initialize(yaml) + @type = yaml['type'] + @host = yaml['host'] + @username = yaml['username'] + @password = yaml['password'] + @schema = yaml['schema'] + @table_prefix = yaml['table_prefix'] + @batch_size = yaml['batch_size'] + end + end +end diff --git a/script/import_scripts/phpbb3/support/smiley_processor.rb b/script/import_scripts/phpbb3/support/smiley_processor.rb new file mode 100644 index 00000000000..f79a24c4659 --- /dev/null +++ b/script/import_scripts/phpbb3/support/smiley_processor.rb @@ -0,0 +1,90 @@ +module ImportScripts::PhpBB3 + class SmileyProcessor + # @param uploader [ImportScripts::Uploader] + # @param settings [ImportScripts::PhpBB3::Settings] + # @param phpbb_config [Hash] + def initialize(uploader, settings, phpbb_config) + @uploader = uploader + @smilies_path = File.join(settings.base_dir, phpbb_config[:smilies_path]) + + @smiley_map = {} + add_default_smilies + add_configured_smilies(settings.emojis) + end + + def replace_smilies(text) + # :) is encoded as :) + text.gsub!(/(.*?)/) do + smiley = $1 + + @smiley_map.fetch(smiley) do + upload_smiley(smiley, $2, $3, $4) || smiley_as_text(smiley) + end + end + end + + protected + + def add_default_smilies + { + [':D', ':-D', ':grin:'] => ':smiley:', + [':)', ':-)', ':smile:'] => ':smile:', + [';)', ';-)', ':wink:'] => ':wink:', + [':(', ':-(', ':sad:'] => ':frowning:', + [':o', ':-o', ':eek:'] => ':astonished:', + [':shock:'] => ':open_mouth:', + [':?', ':-?', ':???:'] => ':confused:', + ['8-)', ':cool:'] => ':sunglasses:', + [':lol:'] => ':laughing:', + [':x', ':-x', ':mad:'] => ':angry:', + [':P', ':-P', ':razz:'] => ':stuck_out_tongue:', + [':oops:'] => ':blush:', + [':cry:'] => ':cry:', + [':evil:'] => ':imp:', + [':twisted:'] => ':smiling_imp:', + [':roll:'] => ':unamused:', + [':!:'] => ':exclamation:', + [':?:'] => ':question:', + [':idea:'] => ':bulb:', + [':arrow:'] => ':arrow_right:', + [':|', ':-|'] => ':neutral_face:' + }.each do |smilies, emoji| + smilies.each { |smiley| @smiley_map[smiley] = emoji } + end + end + + def add_configured_smilies(emojis) + emojis.each do |emoji, smilies| + Array.wrap(smilies) + .each { |smiley| @smiley_map[smiley] = ":#{emoji}:" } + end + end + + def upload_smiley(smiley, path, alt_text, title) + path = File.join(@smilies_path, path) + filename = File.basename(path) + upload = @uploader.create_upload(Discourse::SYSTEM_USER_ID, path, filename) + + if upload.nil? || !upload.valid? + puts "Failed to upload #{path}" + puts upload.errors.inspect if upload + html = nil + else + html = embedded_image_html(upload, alt_text, title) + @smiley_map[smiley] = html + end + + html + end + + def embedded_image_html(upload, alt_text, title) + image_width = [upload.width, SiteSetting.max_image_width].compact.min + image_height = [upload.height, SiteSetting.max_image_height].compact.min + %Q[#{alt_text}] + end + + def smiley_as_text(smiley) + @smiley_map[smiley] = smiley + end + end +end diff --git a/script/import_scripts/phpbb3/support/text_processor.rb b/script/import_scripts/phpbb3/support/text_processor.rb new file mode 100644 index 00000000000..c0e99e4dd2a --- /dev/null +++ b/script/import_scripts/phpbb3/support/text_processor.rb @@ -0,0 +1,133 @@ +module ImportScripts::PhpBB3 + class TextProcessor + # @param lookup [ImportScripts::LookupContainer] + # @param database [ImportScripts::PhpBB3::Database_3_0 | ImportScripts::PhpBB3::Database_3_1] + # @param smiley_processor [ImportScripts::PhpBB3::SmileyProcessor] + # @param settings [ImportScripts::PhpBB3::Settings] + def initialize(lookup, database, smiley_processor, settings) + @lookup = lookup + @database = database + @smiley_processor = smiley_processor + + @new_site_prefix = settings.new_site_prefix + create_internal_link_regexps(settings.original_site_prefix) + end + + def process_raw_text(raw) + text = raw.dup + text = CGI.unescapeHTML(text) + + clean_bbcodes(text) + process_smilies(text) + process_links(text) + process_lists(text) + + text + end + + def process_post(raw, attachments) + text = process_raw_text(raw) + text = process_attachments(text, attachments) if attachments.present? + text + end + + def process_private_msg(raw, attachments) + text = process_raw_text(raw) + text = process_attachments(text, attachments) if attachments.present? + text + end + + protected + + def clean_bbcodes(text) + # Many phpbb bbcode tags have a hash attached to them. Examples: + # [url=https://google.com:1qh1i7ky]click here[/url:1qh1i7ky] + # [quote="cybereality":b0wtlzex]Some text.[/quote:b0wtlzex] + text.gsub!(/:(?:\w{8})\]/, ']') + end + + def process_smilies(text) + @smiley_processor.replace_smilies(text) + end + + def process_links(text) + # Internal forum links can have this forms: + # for topics: viewtopic.php?f=26&t=3412 + # for posts: viewtopic.php?p=1732#p1732 + text.gsub!(@long_internal_link_regexp) do |link| + replace_internal_link(link, $1, $2) + end + + # Some links look like this: http://www.onegameamonth.com + text.gsub!(/(.+)<\/a>/i, '[\2](\1)') + + # Replace internal forum links that aren't in the format + text.gsub!(@short_internal_link_regexp) do |link| + replace_internal_link(link, $1, $2) + end + + # phpBB shortens link text like this, which breaks our markdown processing: + # [http://answers.yahoo.com/question/index ... 223AAkkPli](http://answers.yahoo.com/question/index?qid=20070920134223AAkkPli) + # + # Work around it for now: + text.gsub!(/\[http(s)?:\/\/(www\.)?/i, '[') + end + + def replace_internal_link(link, import_topic_id, import_post_id) + if import_post_id.nil? + replace_internal_topic_link(link, import_topic_id) + else + replace_internal_post_link(link, import_post_id) + end + end + + def replace_internal_topic_link(link, import_topic_id) + import_post_id = @database.get_first_post_id(import_topic_id) + return link if import_post_id.nil? + + replace_internal_post_link(link, import_post_id) + end + + def replace_internal_post_link(link, import_post_id) + topic = @lookup.topic_lookup_from_imported_post_id(import_post_id) + topic ? "#{@new_site_prefix}#{topic[:url]}" : link + end + + def process_lists(text) + # convert list tags to ul and list=1 tags to ol + # list=a is not supported, so handle it like list=1 + # list=9 and list=x have the same result as list=1 and list=a + text.gsub!(/\[list\](.*?)\[\/list:u\]/mi, '[ul]\1[/ul]') + text.gsub!(/\[list=.*?\](.*?)\[\/list:o\]/mi, '[ol]\1[/ol]') + + # convert *-tags to li-tags so bbcode-to-md can do its magic on phpBB's lists: + text.gsub!(/\[\*\](.*?)\[\/\*:m\]/mi, '[li]\1[/li]') + end + + # This replaces existing [attachment] BBCodes with the corresponding HTML tags for Discourse. + # All attachments that haven't been referenced in the text are appended to the end of the text. + def process_attachments(text, attachments) + attachment_regexp = /\[attachment=([\d])+\]([^<]+)\[\/attachment\]?/i + unreferenced_attachments = attachments.dup + + text = text.gsub(attachment_regexp) do + index = $1.to_i + real_filename = $2 + unreferenced_attachments[index] = nil + attachments.fetch(index, real_filename) + end + + unreferenced_attachments = unreferenced_attachments.compact + text << "\n" << unreferenced_attachments.join("\n") unless unreferenced_attachments.empty? + text + end + + def create_internal_link_regexps(original_site_prefix) + host = original_site_prefix.gsub('.', '\.') + link_regex = "http(?:s)?://#{host}/viewtopic\\.php\\?(?:\\S*)(?:t=(\\d+)|p=(\\d+)(?:#p\\d+)?)" + + @long_internal_link_regexp = Regexp.new(%Q||, Regexp::IGNORECASE) + @short_internal_link_regexp = Regexp.new(link_regex, Regexp::IGNORECASE) + end + end +end