Make sure PMs imported from phpBB3 are only visible to the correct users

In addition this tries to automatically fix PMs that were migrated
from phpBB2 to phpBB3.
This commit is contained in:
Gerhard Schlager 2016-04-24 16:31:41 +02:00
parent ebd4b45771
commit 2e1cc061d8
No known key found for this signature in database
GPG Key ID: 7DACA3C95B36014B
5 changed files with 105 additions and 195 deletions

View File

@ -161,82 +161,39 @@ module ImportScripts::PhpBB3
SQL SQL
end end
def count_messages(use_fixed_messages) def count_messages
if use_fixed_messages count(<<-SQL)
count(<<-SQL) SELECT COUNT(*) AS count
SELECT COUNT(*) AS count FROM #{@table_prefix}_privmsgs m
FROM #{@table_prefix}_import_privmsgs WHERE NOT EXISTS ( -- ignore duplicate messages
SQL SELECT 1
else FROM #{@table_prefix}_privmsgs x
count(<<-SQL) WHERE x.msg_id < m.msg_id AND x.root_level = m.root_level AND x.author_id = m.author_id
SELECT COUNT(*) AS count AND x.to_address = m.to_address AND x.message_time = m.message_time
FROM #{@table_prefix}_privmsgs )
SQL SQL
end
end end
def fetch_messages(use_fixed_messages, last_msg_id) def fetch_messages(last_msg_id)
if use_fixed_messages query(<<-SQL, :msg_id)
query(<<-SQL, :msg_id) SELECT m.msg_id, m.root_level AS root_msg_id, m.author_id, m.message_time, m.message_subject,
SELECT m.msg_id, i.root_msg_id, m.author_id, m.message_time, m.message_subject, m.message_text, m.message_text, m.to_address, r.author_id AS root_author_id, r.to_address AS root_to_address, (
IFNULL(a.attachment_count, 0) AS attachment_count SELECT COUNT(*)
FROM #{@table_prefix}_privmsgs m FROM #{@table_prefix}_attachments a
JOIN #{@table_prefix}_import_privmsgs i ON (m.msg_id = i.msg_id) WHERE a.topic_id = 0 AND m.msg_id = a.post_msg_id
LEFT OUTER JOIN ( ) AS attachment_count
SELECT post_msg_id, COUNT(*) AS attachment_count FROM #{@table_prefix}_privmsgs m
FROM #{@table_prefix}_attachments LEFT OUTER JOIN #{@table_prefix}_privmsgs r ON (m.root_level = r.msg_id)
WHERE topic_id = 0 WHERE m.msg_id > #{last_msg_id}
GROUP BY post_msg_id AND NOT EXISTS ( -- ignore duplicate messages
) a ON (m.msg_id = a.post_msg_id) SELECT 1
WHERE m.msg_id > #{last_msg_id} FROM #{@table_prefix}_privmsgs x
ORDER BY i.root_msg_id, m.msg_id WHERE x.msg_id < m.msg_id AND x.root_level = m.root_level AND x.author_id = m.author_id
LIMIT #{@batch_size} AND x.to_address = m.to_address AND x.message_time = m.message_time
SQL )
else ORDER BY m.msg_id
query(<<-SQL, :msg_id) LIMIT #{@batch_size}
SELECT m.msg_id, m.root_level AS root_msg_id, m.author_id, m.message_time, m.message_subject, SQL
m.message_text, IFNULL(a.attachment_count, 0) AS attachment_count
FROM #{@table_prefix}_privmsgs m
LEFT OUTER JOIN (
SELECT post_msg_id, COUNT(*) AS attachment_count
FROM #{@table_prefix}_attachments
WHERE topic_id = 0
GROUP BY post_msg_id
) a ON (m.msg_id = a.post_msg_id)
WHERE m.msg_id > #{last_msg_id}
ORDER BY m.root_level, m.msg_id
LIMIT #{@batch_size}
SQL
end
end
def fetch_message_participants(msg_id, use_fixed_messages)
if use_fixed_messages
query(<<-SQL)
SELECT m.to_address
FROM #{@table_prefix}_privmsgs m
JOIN #{@table_prefix}_import_privmsgs i ON (m.msg_id = i.msg_id)
WHERE i.msg_id = #{msg_id} OR i.root_msg_id = #{msg_id}
SQL
else
query(<<-SQL)
SELECT m.to_address
FROM #{@table_prefix}_privmsgs m
WHERE m.msg_id = #{msg_id} OR m.root_level = #{msg_id}
SQL
end
end
def calculate_fixed_messages
drop_temp_import_message_table
create_temp_import_message_table
fill_temp_import_message_table
drop_import_message_table
create_import_message_table
fill_import_message_table
drop_temp_import_message_table
end end
def count_bookmarks def count_bookmarks
@ -268,83 +225,5 @@ module ImportScripts::PhpBB3
(SELECT config_value FROM #{@table_prefix}_config WHERE config_name = 'upload_path') AS attachment_path (SELECT config_value FROM #{@table_prefix}_config WHERE config_name = 'upload_path') AS attachment_path
SQL SQL
end end
protected
def drop_temp_import_message_table
query("DROP TABLE IF EXISTS #{@table_prefix}_import_privmsgs_temp")
end
def create_temp_import_message_table
query(<<-SQL)
CREATE TABLE #{@table_prefix}_import_privmsgs_temp (
msg_id MEDIUMINT(8) NOT NULL,
root_msg_id MEDIUMINT(8) NOT NULL,
recipient_id MEDIUMINT(8),
normalized_subject VARCHAR(255) NOT NULL,
PRIMARY KEY (msg_id)
)
SQL
end
# this removes duplicate messages, converts the to_address to a number
# and stores the message_subject in lowercase and without the prefix "Re: "
def fill_temp_import_message_table
query(<<-SQL)
INSERT INTO #{@table_prefix}_import_privmsgs_temp (msg_id, root_msg_id, recipient_id, normalized_subject)
SELECT m.msg_id, m.root_level,
CASE WHEN m.root_level = 0 AND INSTR(m.to_address, ':') = 0 THEN
CAST(SUBSTRING(m.to_address, 3) AS SIGNED INTEGER)
ELSE NULL END AS recipient_id,
LOWER(CASE WHEN m.message_subject LIKE 'Re: %' THEN
SUBSTRING(m.message_subject, 5)
ELSE m.message_subject END) AS normalized_subject
FROM #{@table_prefix}_privmsgs m
WHERE NOT EXISTS (
SELECT 1
FROM #{@table_prefix}_privmsgs x
WHERE x.msg_id < m.msg_id AND x.root_level = m.root_level AND x.author_id = m.author_id
AND x.to_address = m.to_address AND x.message_time = m.message_time
)
SQL
end
def drop_import_message_table
query("DROP TABLE IF EXISTS #{@table_prefix}_import_privmsgs")
end
def create_import_message_table
query(<<-SQL)
CREATE TABLE #{@table_prefix}_import_privmsgs (
msg_id MEDIUMINT(8) NOT NULL,
root_msg_id MEDIUMINT(8) NOT NULL,
PRIMARY KEY (msg_id),
INDEX #{@table_prefix}_import_privmsgs_root_msg_id (root_msg_id)
)
SQL
end
# this tries to calculate the actual root_level (= msg_id of the first message in a
# private conversation) based on subject, time, author and recipient
def fill_import_message_table
query(<<-SQL)
INSERT INTO #{@table_prefix}_import_privmsgs (msg_id, root_msg_id)
SELECT m.msg_id, CASE WHEN i.root_msg_id = 0 THEN
COALESCE((
SELECT a.msg_id
FROM #{@table_prefix}_privmsgs a
JOIN #{@table_prefix}_import_privmsgs_temp b ON (a.msg_id = b.msg_id)
WHERE ((a.author_id = m.author_id AND b.recipient_id = i.recipient_id) OR
(a.author_id = i.recipient_id AND b.recipient_id = m.author_id))
AND b.normalized_subject = i.normalized_subject
AND a.msg_id <> m.msg_id
AND a.message_time < m.message_time
ORDER BY a.message_time
LIMIT 1
), 0) ELSE i.root_msg_id END AS root_msg_id
FROM #{@table_prefix}_privmsgs m
JOIN #{@table_prefix}_import_privmsgs_temp i ON (m.msg_id = i.msg_id)
SQL
end
end end
end end

View File

@ -118,18 +118,13 @@ module ImportScripts::PhpBB3
end end
def import_private_messages def import_private_messages
if @settings.fix_private_messages
puts '', 'fixing private messages'
@database.calculate_fixed_messages
end
puts '', 'creating private messages' puts '', 'creating private messages'
total_count = @database.count_messages(@settings.fix_private_messages) total_count = @database.count_messages
importer = @importers.message_importer importer = @importers.message_importer
last_msg_id = 0 last_msg_id = 0
batches do |offset| batches do |offset|
rows, last_msg_id = @database.fetch_messages(@settings.fix_private_messages, last_msg_id) rows, last_msg_id = @database.fetch_messages(last_msg_id)
break if rows.size < 1 break if rows.size < 1
next if all_records_exist?(:posts, importer.map_to_import_ids(rows)) next if all_records_exist?(:posts, importer.map_to_import_ids(rows))

View File

@ -14,7 +14,7 @@ module ImportScripts::PhpBB3
end end
def map_to_import_ids(rows) def map_to_import_ids(rows)
rows.map { |row| get_import_id(row) } rows.map { |row| get_import_id(row[:msg_id]) }
end end
@ -23,31 +23,38 @@ module ImportScripts::PhpBB3
attachments = import_attachments(row, user_id) attachments = import_attachments(row, user_id)
mapped = { mapped = {
id: get_import_id(row), id: get_import_id(row[:msg_id]),
user_id: user_id, user_id: user_id,
created_at: Time.zone.at(row[:message_time]), created_at: Time.zone.at(row[:message_time]),
raw: @text_processor.process_private_msg(row[:message_text], attachments) raw: @text_processor.process_private_msg(row[:message_text], attachments)
} }
if row[:root_msg_id] == 0 root_user_ids = sorted_user_ids(row[:root_author_id], row[:root_to_address])
map_first_message(row, mapped) current_user_ids = sorted_user_ids(row[:author_id], row[:to_address])
topic_id = get_topic_id(row, root_user_ids, current_user_ids)
if topic_id.blank?
map_first_message(row, current_user_ids, mapped)
else else
map_other_message(row, mapped) map_other_message(row, topic_id, mapped)
end end
end end
protected protected
RE_PREFIX = 're: '
def import_attachments(row, user_id) def import_attachments(row, user_id)
if @settings.import_attachments && row[:attachment_count] > 0 if @settings.import_attachments && row[:attachment_count] > 0
@attachment_importer.import_attachments(user_id, row[:msg_id]) @attachment_importer.import_attachments(user_id, row[:msg_id])
end end
end end
def map_first_message(row, mapped) def map_first_message(row, current_user_ids, mapped)
mapped[:title] = CGI.unescapeHTML(row[:message_subject]) mapped[:title] = get_topic_title(row)
mapped[:archetype] = Archetype.private_message mapped[:archetype] = Archetype.private_message
mapped[:target_usernames] = get_usernames(row[:msg_id], row[:author_id]) mapped[:target_usernames] = get_recipient_usernames(row)
mapped[:custom_fields] = {import_user_ids: current_user_ids.join(',')}
if mapped[:target_usernames].empty? # pm with yourself? if mapped[:target_usernames].empty? # pm with yourself?
puts "Private message without recipients. Skipping #{row[:msg_id]}: #{row[:message_subject][0..40]}" puts "Private message without recipients. Skipping #{row[:msg_id]}: #{row[:message_subject][0..40]}"
@ -57,36 +64,73 @@ module ImportScripts::PhpBB3
mapped mapped
end end
def map_other_message(row, mapped) def map_other_message(row, topic_id, mapped)
parent_msg_id = "pm:#{row[:root_msg_id]}" mapped[:topic_id] = topic_id
parent = @lookup.topic_lookup_from_imported_post_id(parent_msg_id)
if parent.blank?
puts "Parent post #{parent_msg_id} doesn't exist. Skipping #{row[:msg_id]}: #{row[:message_subject][0..40]}"
return nil
end
mapped[:topic_id] = parent[:topic_id]
mapped mapped
end end
def get_usernames(msg_id, author_id) def get_recipient_user_ids(to_address)
# Find the users who are part of this private message. return [] if to_address.blank?
# Found from the to_address of phpbb_privmsgs, by looking at
# all the rows with the same root_msg_id.
# to_address looks like this: "u_91:u_1234:u_200" # to_address looks like this: "u_91:u_1234:u_200"
# The "u_" prefix is discarded and the rest is a user_id. # The "u_" prefix is discarded and the rest is a user_id.
import_user_ids = @database.fetch_message_participants(msg_id, @settings.fix_private_messages) user_ids = to_address.split(':')
.map { |r| r[:to_address].split(':') } user_ids.uniq!
.flatten!.uniq.map! { |u| u[2..-1] } user_ids.map! { |u| u[2..-1].to_i }
end
def get_recipient_usernames(row)
author_id = row[:author_id].to_s
import_user_ids = get_recipient_user_ids(row[:to_address])
import_user_ids.map! do |import_user_id| import_user_ids.map! do |import_user_id|
import_user_id.to_s == author_id.to_s ? nil : @lookup.find_user_by_import_id(import_user_id).try(:username) import_user_id.to_s == author_id ? nil : @lookup.find_user_by_import_id(import_user_id).try(:username)
end.compact end.compact
end end
def get_import_id(row) def get_topic_title(row)
"pm:#{row[:msg_id]}" CGI.unescapeHTML(row[:message_subject])
end
def get_import_id(msg_id)
"pm:#{msg_id}"
end
# Creates a sorted array consisting of the message's author and recipients.
def sorted_user_ids(author_id, to_address)
user_ids = get_recipient_user_ids(to_address)
user_ids << author_id unless author_id.nil?
user_ids.uniq!
user_ids.sort!
end
def get_topic_id(row, root_user_ids, current_user_ids)
if row[:root_msg_id] == 0 || root_user_ids != current_user_ids
# Let's try to find an existing Discourse topic_id if this looks like a root message or
# the user IDs of the root message are different from the current message.
find_topic_id(row, current_user_ids)
else
# This appears to be a reply. Let's try to find the Discourse topic_id for this message.
parent_msg_id = get_import_id(row[:root_msg_id])
parent = @lookup.topic_lookup_from_imported_post_id(parent_msg_id)
parent[:topic_id] unless parent.blank?
end
end
# Tries to find a Discourse topic (private message) that has the same title as the current message.
# The users involved in these messages must match too.
def find_topic_id(row, current_user_ids)
topic_title = get_topic_title(row).downcase
topic_titles = [topic_title]
topic_titles << topic_title[RE_PREFIX.length..-1] if topic_title.start_with?(RE_PREFIX)
Post.select(:topic_id)
.joins(:topic)
.joins(:_custom_fields)
.where(["LOWER(topics.title) IN (:titles) AND post_custom_fields.name = 'import_user_ids' AND post_custom_fields.value = :user_ids",
{titles: topic_titles, user_ids: current_user_ids.join(',')}])
.order('topics.created_at DESC')
.first.try(:topic_id)
end end
end end
end end

View File

@ -52,12 +52,6 @@ import:
private_messages: true private_messages: true
polls: true polls: true
# This tries to fix Private Messages that were imported from phpBB2 to phpBB3.
# You should enable this option if you see duplicate messages or lots of related
# messages as topics with just one post (e.g. 'Importer', 'Re: Importer', 'Re: Importer'
# should be one topic named 'Importer' and consist of 3 posts).
fix_private_messages: false
# When true: each imported user will have the original username from phpBB as its name # When true: each imported user will have the original username from phpBB as its name
# When false: the name of each user will be blank # When false: the name of each user will be blank
username_as_name: false username_as_name: false

View File

@ -18,7 +18,6 @@ module ImportScripts::PhpBB3
attr_reader :import_remote_avatars attr_reader :import_remote_avatars
attr_reader :import_gallery_avatars attr_reader :import_gallery_avatars
attr_reader :fix_private_messages
attr_reader :use_bbcode_to_md attr_reader :use_bbcode_to_md
attr_reader :original_site_prefix attr_reader :original_site_prefix
@ -45,7 +44,6 @@ module ImportScripts::PhpBB3
@import_remote_avatars = avatar_settings['remote'] @import_remote_avatars = avatar_settings['remote']
@import_gallery_avatars = avatar_settings['gallery'] @import_gallery_avatars = avatar_settings['gallery']
@fix_private_messages = import_settings['fix_private_messages']
@use_bbcode_to_md =import_settings['use_bbcode_to_md'] @use_bbcode_to_md =import_settings['use_bbcode_to_md']
@original_site_prefix = import_settings['site_prefix']['original'] @original_site_prefix = import_settings['site_prefix']['original']