diff --git a/script/import_scripts/phpbb3/database/database_3_0.rb b/script/import_scripts/phpbb3/database/database_3_0.rb index f6ebc9223e4..94ec4ca50f0 100644 --- a/script/import_scripts/phpbb3/database/database_3_0.rb +++ b/script/import_scripts/phpbb3/database/database_3_0.rb @@ -161,82 +161,39 @@ module ImportScripts::PhpBB3 SQL end - def count_messages(use_fixed_messages) - if use_fixed_messages - count(<<-SQL) - SELECT COUNT(*) AS count - FROM #{@table_prefix}_import_privmsgs - SQL - else - count(<<-SQL) - SELECT COUNT(*) AS count - FROM #{@table_prefix}_privmsgs - SQL - end + def count_messages + count(<<-SQL) + SELECT COUNT(*) AS count + FROM #{@table_prefix}_privmsgs m + WHERE NOT EXISTS ( -- ignore duplicate messages + SELECT 1 + FROM #{@table_prefix}_privmsgs x + WHERE x.msg_id < m.msg_id AND x.root_level = m.root_level AND x.author_id = m.author_id + AND x.to_address = m.to_address AND x.message_time = m.message_time + ) + SQL end - def fetch_messages(use_fixed_messages, last_msg_id) - if use_fixed_messages - query(<<-SQL, :msg_id) - SELECT m.msg_id, i.root_msg_id, m.author_id, m.message_time, m.message_subject, m.message_text, - IFNULL(a.attachment_count, 0) AS attachment_count - FROM #{@table_prefix}_privmsgs m - JOIN #{@table_prefix}_import_privmsgs i ON (m.msg_id = i.msg_id) - LEFT OUTER JOIN ( - SELECT post_msg_id, COUNT(*) AS attachment_count - FROM #{@table_prefix}_attachments - WHERE topic_id = 0 - GROUP BY post_msg_id - ) a ON (m.msg_id = a.post_msg_id) - WHERE m.msg_id > #{last_msg_id} - ORDER BY i.root_msg_id, m.msg_id - LIMIT #{@batch_size} - SQL - else - query(<<-SQL, :msg_id) - SELECT m.msg_id, m.root_level AS root_msg_id, m.author_id, m.message_time, m.message_subject, - m.message_text, IFNULL(a.attachment_count, 0) AS attachment_count - FROM #{@table_prefix}_privmsgs m - LEFT OUTER JOIN ( - SELECT post_msg_id, COUNT(*) AS attachment_count - FROM #{@table_prefix}_attachments - WHERE topic_id = 0 - GROUP BY post_msg_id - ) a ON (m.msg_id = a.post_msg_id) - WHERE m.msg_id > #{last_msg_id} - ORDER BY m.root_level, m.msg_id - LIMIT #{@batch_size} - SQL - end - end - - def fetch_message_participants(msg_id, use_fixed_messages) - if use_fixed_messages - query(<<-SQL) - SELECT m.to_address - FROM #{@table_prefix}_privmsgs m - JOIN #{@table_prefix}_import_privmsgs i ON (m.msg_id = i.msg_id) - WHERE i.msg_id = #{msg_id} OR i.root_msg_id = #{msg_id} - SQL - else - query(<<-SQL) - SELECT m.to_address - FROM #{@table_prefix}_privmsgs m - WHERE m.msg_id = #{msg_id} OR m.root_level = #{msg_id} - SQL - end - end - - def calculate_fixed_messages - drop_temp_import_message_table - create_temp_import_message_table - fill_temp_import_message_table - - drop_import_message_table - create_import_message_table - fill_import_message_table - - drop_temp_import_message_table + def fetch_messages(last_msg_id) + query(<<-SQL, :msg_id) + SELECT m.msg_id, m.root_level AS root_msg_id, m.author_id, m.message_time, m.message_subject, + m.message_text, m.to_address, r.author_id AS root_author_id, r.to_address AS root_to_address, ( + SELECT COUNT(*) + FROM #{@table_prefix}_attachments a + WHERE a.topic_id = 0 AND m.msg_id = a.post_msg_id + ) AS attachment_count + FROM #{@table_prefix}_privmsgs m + LEFT OUTER JOIN #{@table_prefix}_privmsgs r ON (m.root_level = r.msg_id) + WHERE m.msg_id > #{last_msg_id} + AND NOT EXISTS ( -- ignore duplicate messages + SELECT 1 + FROM #{@table_prefix}_privmsgs x + WHERE x.msg_id < m.msg_id AND x.root_level = m.root_level AND x.author_id = m.author_id + AND x.to_address = m.to_address AND x.message_time = m.message_time + ) + ORDER BY m.msg_id + LIMIT #{@batch_size} + SQL end def count_bookmarks @@ -268,83 +225,5 @@ module ImportScripts::PhpBB3 (SELECT config_value FROM #{@table_prefix}_config WHERE config_name = 'upload_path') AS attachment_path SQL end - - protected - - def drop_temp_import_message_table - query("DROP TABLE IF EXISTS #{@table_prefix}_import_privmsgs_temp") - end - - def create_temp_import_message_table - query(<<-SQL) - CREATE TABLE #{@table_prefix}_import_privmsgs_temp ( - msg_id MEDIUMINT(8) NOT NULL, - root_msg_id MEDIUMINT(8) NOT NULL, - recipient_id MEDIUMINT(8), - normalized_subject VARCHAR(255) NOT NULL, - PRIMARY KEY (msg_id) - ) - SQL - end - - # this removes duplicate messages, converts the to_address to a number - # and stores the message_subject in lowercase and without the prefix "Re: " - def fill_temp_import_message_table - query(<<-SQL) - INSERT INTO #{@table_prefix}_import_privmsgs_temp (msg_id, root_msg_id, recipient_id, normalized_subject) - SELECT m.msg_id, m.root_level, - CASE WHEN m.root_level = 0 AND INSTR(m.to_address, ':') = 0 THEN - CAST(SUBSTRING(m.to_address, 3) AS SIGNED INTEGER) - ELSE NULL END AS recipient_id, - LOWER(CASE WHEN m.message_subject LIKE 'Re: %' THEN - SUBSTRING(m.message_subject, 5) - ELSE m.message_subject END) AS normalized_subject - FROM #{@table_prefix}_privmsgs m - WHERE NOT EXISTS ( - SELECT 1 - FROM #{@table_prefix}_privmsgs x - WHERE x.msg_id < m.msg_id AND x.root_level = m.root_level AND x.author_id = m.author_id - AND x.to_address = m.to_address AND x.message_time = m.message_time - ) - SQL - end - - def drop_import_message_table - query("DROP TABLE IF EXISTS #{@table_prefix}_import_privmsgs") - end - - def create_import_message_table - query(<<-SQL) - CREATE TABLE #{@table_prefix}_import_privmsgs ( - msg_id MEDIUMINT(8) NOT NULL, - root_msg_id MEDIUMINT(8) NOT NULL, - PRIMARY KEY (msg_id), - INDEX #{@table_prefix}_import_privmsgs_root_msg_id (root_msg_id) - ) - SQL - end - - # this tries to calculate the actual root_level (= msg_id of the first message in a - # private conversation) based on subject, time, author and recipient - def fill_import_message_table - query(<<-SQL) - INSERT INTO #{@table_prefix}_import_privmsgs (msg_id, root_msg_id) - SELECT m.msg_id, CASE WHEN i.root_msg_id = 0 THEN - COALESCE(( - SELECT a.msg_id - FROM #{@table_prefix}_privmsgs a - JOIN #{@table_prefix}_import_privmsgs_temp b ON (a.msg_id = b.msg_id) - WHERE ((a.author_id = m.author_id AND b.recipient_id = i.recipient_id) OR - (a.author_id = i.recipient_id AND b.recipient_id = m.author_id)) - AND b.normalized_subject = i.normalized_subject - AND a.msg_id <> m.msg_id - AND a.message_time < m.message_time - ORDER BY a.message_time - LIMIT 1 - ), 0) ELSE i.root_msg_id END AS root_msg_id - FROM #{@table_prefix}_privmsgs m - JOIN #{@table_prefix}_import_privmsgs_temp i ON (m.msg_id = i.msg_id) - SQL - end end end diff --git a/script/import_scripts/phpbb3/importer.rb b/script/import_scripts/phpbb3/importer.rb index b0c46c870ae..b9daaa15687 100644 --- a/script/import_scripts/phpbb3/importer.rb +++ b/script/import_scripts/phpbb3/importer.rb @@ -118,18 +118,13 @@ module ImportScripts::PhpBB3 end def import_private_messages - if @settings.fix_private_messages - puts '', 'fixing private messages' - @database.calculate_fixed_messages - end - puts '', 'creating private messages' - total_count = @database.count_messages(@settings.fix_private_messages) + total_count = @database.count_messages importer = @importers.message_importer last_msg_id = 0 batches do |offset| - rows, last_msg_id = @database.fetch_messages(@settings.fix_private_messages, last_msg_id) + rows, last_msg_id = @database.fetch_messages(last_msg_id) break if rows.size < 1 next if all_records_exist?(:posts, importer.map_to_import_ids(rows)) diff --git a/script/import_scripts/phpbb3/importers/message_importer.rb b/script/import_scripts/phpbb3/importers/message_importer.rb index 0ebab7d242a..c164806b657 100644 --- a/script/import_scripts/phpbb3/importers/message_importer.rb +++ b/script/import_scripts/phpbb3/importers/message_importer.rb @@ -14,7 +14,7 @@ module ImportScripts::PhpBB3 end def map_to_import_ids(rows) - rows.map { |row| get_import_id(row) } + rows.map { |row| get_import_id(row[:msg_id]) } end @@ -23,31 +23,38 @@ module ImportScripts::PhpBB3 attachments = import_attachments(row, user_id) mapped = { - id: get_import_id(row), + id: get_import_id(row[:msg_id]), user_id: user_id, created_at: Time.zone.at(row[:message_time]), raw: @text_processor.process_private_msg(row[:message_text], attachments) } - if row[:root_msg_id] == 0 - map_first_message(row, mapped) + root_user_ids = sorted_user_ids(row[:root_author_id], row[:root_to_address]) + current_user_ids = sorted_user_ids(row[:author_id], row[:to_address]) + topic_id = get_topic_id(row, root_user_ids, current_user_ids) + + if topic_id.blank? + map_first_message(row, current_user_ids, mapped) else - map_other_message(row, mapped) + map_other_message(row, topic_id, mapped) end end protected + RE_PREFIX = 're: ' + def import_attachments(row, user_id) if @settings.import_attachments && row[:attachment_count] > 0 @attachment_importer.import_attachments(user_id, row[:msg_id]) end end - def map_first_message(row, mapped) - mapped[:title] = CGI.unescapeHTML(row[:message_subject]) + def map_first_message(row, current_user_ids, mapped) + mapped[:title] = get_topic_title(row) mapped[:archetype] = Archetype.private_message - mapped[:target_usernames] = get_usernames(row[:msg_id], row[:author_id]) + mapped[:target_usernames] = get_recipient_usernames(row) + mapped[:custom_fields] = {import_user_ids: current_user_ids.join(',')} if mapped[:target_usernames].empty? # pm with yourself? puts "Private message without recipients. Skipping #{row[:msg_id]}: #{row[:message_subject][0..40]}" @@ -57,36 +64,73 @@ module ImportScripts::PhpBB3 mapped end - def map_other_message(row, mapped) - parent_msg_id = "pm:#{row[:root_msg_id]}" - parent = @lookup.topic_lookup_from_imported_post_id(parent_msg_id) - - if parent.blank? - puts "Parent post #{parent_msg_id} doesn't exist. Skipping #{row[:msg_id]}: #{row[:message_subject][0..40]}" - return nil - end - - mapped[:topic_id] = parent[:topic_id] + def map_other_message(row, topic_id, mapped) + mapped[:topic_id] = topic_id mapped end - def get_usernames(msg_id, author_id) - # Find the users who are part of this private message. - # Found from the to_address of phpbb_privmsgs, by looking at - # all the rows with the same root_msg_id. + def get_recipient_user_ids(to_address) + return [] if to_address.blank? + # to_address looks like this: "u_91:u_1234:u_200" # The "u_" prefix is discarded and the rest is a user_id. - import_user_ids = @database.fetch_message_participants(msg_id, @settings.fix_private_messages) - .map { |r| r[:to_address].split(':') } - .flatten!.uniq.map! { |u| u[2..-1] } + user_ids = to_address.split(':') + user_ids.uniq! + user_ids.map! { |u| u[2..-1].to_i } + end + + def get_recipient_usernames(row) + author_id = row[:author_id].to_s + import_user_ids = get_recipient_user_ids(row[:to_address]) import_user_ids.map! do |import_user_id| - import_user_id.to_s == author_id.to_s ? nil : @lookup.find_user_by_import_id(import_user_id).try(:username) + import_user_id.to_s == author_id ? nil : @lookup.find_user_by_import_id(import_user_id).try(:username) end.compact end - def get_import_id(row) - "pm:#{row[:msg_id]}" + def get_topic_title(row) + CGI.unescapeHTML(row[:message_subject]) + end + + def get_import_id(msg_id) + "pm:#{msg_id}" + end + + # Creates a sorted array consisting of the message's author and recipients. + def sorted_user_ids(author_id, to_address) + user_ids = get_recipient_user_ids(to_address) + user_ids << author_id unless author_id.nil? + user_ids.uniq! + user_ids.sort! + end + + def get_topic_id(row, root_user_ids, current_user_ids) + if row[:root_msg_id] == 0 || root_user_ids != current_user_ids + # Let's try to find an existing Discourse topic_id if this looks like a root message or + # the user IDs of the root message are different from the current message. + find_topic_id(row, current_user_ids) + else + # This appears to be a reply. Let's try to find the Discourse topic_id for this message. + parent_msg_id = get_import_id(row[:root_msg_id]) + parent = @lookup.topic_lookup_from_imported_post_id(parent_msg_id) + parent[:topic_id] unless parent.blank? + end + end + + # Tries to find a Discourse topic (private message) that has the same title as the current message. + # The users involved in these messages must match too. + def find_topic_id(row, current_user_ids) + topic_title = get_topic_title(row).downcase + topic_titles = [topic_title] + topic_titles << topic_title[RE_PREFIX.length..-1] if topic_title.start_with?(RE_PREFIX) + + Post.select(:topic_id) + .joins(:topic) + .joins(:_custom_fields) + .where(["LOWER(topics.title) IN (:titles) AND post_custom_fields.name = 'import_user_ids' AND post_custom_fields.value = :user_ids", + {titles: topic_titles, user_ids: current_user_ids.join(',')}]) + .order('topics.created_at DESC') + .first.try(:topic_id) end end end diff --git a/script/import_scripts/phpbb3/settings.yml b/script/import_scripts/phpbb3/settings.yml index 5164270c910..8377860e8d0 100644 --- a/script/import_scripts/phpbb3/settings.yml +++ b/script/import_scripts/phpbb3/settings.yml @@ -52,12 +52,6 @@ import: private_messages: true polls: true - # This tries to fix Private Messages that were imported from phpBB2 to phpBB3. - # You should enable this option if you see duplicate messages or lots of related - # messages as topics with just one post (e.g. 'Importer', 'Re: Importer', 'Re: Importer' - # should be one topic named 'Importer' and consist of 3 posts). - fix_private_messages: false - # When true: each imported user will have the original username from phpBB as its name # When false: the name of each user will be blank username_as_name: false diff --git a/script/import_scripts/phpbb3/support/settings.rb b/script/import_scripts/phpbb3/support/settings.rb index f7ff71ce9f3..870c162b762 100644 --- a/script/import_scripts/phpbb3/support/settings.rb +++ b/script/import_scripts/phpbb3/support/settings.rb @@ -18,7 +18,6 @@ module ImportScripts::PhpBB3 attr_reader :import_remote_avatars attr_reader :import_gallery_avatars - attr_reader :fix_private_messages attr_reader :use_bbcode_to_md attr_reader :original_site_prefix @@ -45,7 +44,6 @@ module ImportScripts::PhpBB3 @import_remote_avatars = avatar_settings['remote'] @import_gallery_avatars = avatar_settings['gallery'] - @fix_private_messages = import_settings['fix_private_messages'] @use_bbcode_to_md =import_settings['use_bbcode_to_md'] @original_site_prefix = import_settings['site_prefix']['original']