Make sure PMs imported from phpBB3 are only visible to the correct users

In addition this tries to automatically fix PMs that were migrated
from phpBB2 to phpBB3.
This commit is contained in:
Gerhard Schlager 2016-04-24 16:31:41 +02:00
parent ebd4b45771
commit 2e1cc061d8
No known key found for this signature in database
GPG Key ID: 7DACA3C95B36014B
5 changed files with 105 additions and 195 deletions

View File

@ -161,82 +161,39 @@ module ImportScripts::PhpBB3
SQL
end
def count_messages(use_fixed_messages)
if use_fixed_messages
count(<<-SQL)
SELECT COUNT(*) AS count
FROM #{@table_prefix}_import_privmsgs
SQL
else
count(<<-SQL)
SELECT COUNT(*) AS count
FROM #{@table_prefix}_privmsgs
SQL
end
def count_messages
count(<<-SQL)
SELECT COUNT(*) AS count
FROM #{@table_prefix}_privmsgs m
WHERE NOT EXISTS ( -- ignore duplicate messages
SELECT 1
FROM #{@table_prefix}_privmsgs x
WHERE x.msg_id < m.msg_id AND x.root_level = m.root_level AND x.author_id = m.author_id
AND x.to_address = m.to_address AND x.message_time = m.message_time
)
SQL
end
def fetch_messages(use_fixed_messages, last_msg_id)
if use_fixed_messages
query(<<-SQL, :msg_id)
SELECT m.msg_id, i.root_msg_id, m.author_id, m.message_time, m.message_subject, m.message_text,
IFNULL(a.attachment_count, 0) AS attachment_count
FROM #{@table_prefix}_privmsgs m
JOIN #{@table_prefix}_import_privmsgs i ON (m.msg_id = i.msg_id)
LEFT OUTER JOIN (
SELECT post_msg_id, COUNT(*) AS attachment_count
FROM #{@table_prefix}_attachments
WHERE topic_id = 0
GROUP BY post_msg_id
) a ON (m.msg_id = a.post_msg_id)
WHERE m.msg_id > #{last_msg_id}
ORDER BY i.root_msg_id, m.msg_id
LIMIT #{@batch_size}
SQL
else
query(<<-SQL, :msg_id)
SELECT m.msg_id, m.root_level AS root_msg_id, m.author_id, m.message_time, m.message_subject,
m.message_text, IFNULL(a.attachment_count, 0) AS attachment_count
FROM #{@table_prefix}_privmsgs m
LEFT OUTER JOIN (
SELECT post_msg_id, COUNT(*) AS attachment_count
FROM #{@table_prefix}_attachments
WHERE topic_id = 0
GROUP BY post_msg_id
) a ON (m.msg_id = a.post_msg_id)
WHERE m.msg_id > #{last_msg_id}
ORDER BY m.root_level, m.msg_id
LIMIT #{@batch_size}
SQL
end
end
def fetch_message_participants(msg_id, use_fixed_messages)
if use_fixed_messages
query(<<-SQL)
SELECT m.to_address
FROM #{@table_prefix}_privmsgs m
JOIN #{@table_prefix}_import_privmsgs i ON (m.msg_id = i.msg_id)
WHERE i.msg_id = #{msg_id} OR i.root_msg_id = #{msg_id}
SQL
else
query(<<-SQL)
SELECT m.to_address
FROM #{@table_prefix}_privmsgs m
WHERE m.msg_id = #{msg_id} OR m.root_level = #{msg_id}
SQL
end
end
def calculate_fixed_messages
drop_temp_import_message_table
create_temp_import_message_table
fill_temp_import_message_table
drop_import_message_table
create_import_message_table
fill_import_message_table
drop_temp_import_message_table
def fetch_messages(last_msg_id)
query(<<-SQL, :msg_id)
SELECT m.msg_id, m.root_level AS root_msg_id, m.author_id, m.message_time, m.message_subject,
m.message_text, m.to_address, r.author_id AS root_author_id, r.to_address AS root_to_address, (
SELECT COUNT(*)
FROM #{@table_prefix}_attachments a
WHERE a.topic_id = 0 AND m.msg_id = a.post_msg_id
) AS attachment_count
FROM #{@table_prefix}_privmsgs m
LEFT OUTER JOIN #{@table_prefix}_privmsgs r ON (m.root_level = r.msg_id)
WHERE m.msg_id > #{last_msg_id}
AND NOT EXISTS ( -- ignore duplicate messages
SELECT 1
FROM #{@table_prefix}_privmsgs x
WHERE x.msg_id < m.msg_id AND x.root_level = m.root_level AND x.author_id = m.author_id
AND x.to_address = m.to_address AND x.message_time = m.message_time
)
ORDER BY m.msg_id
LIMIT #{@batch_size}
SQL
end
def count_bookmarks
@ -268,83 +225,5 @@ module ImportScripts::PhpBB3
(SELECT config_value FROM #{@table_prefix}_config WHERE config_name = 'upload_path') AS attachment_path
SQL
end
protected
def drop_temp_import_message_table
query("DROP TABLE IF EXISTS #{@table_prefix}_import_privmsgs_temp")
end
def create_temp_import_message_table
query(<<-SQL)
CREATE TABLE #{@table_prefix}_import_privmsgs_temp (
msg_id MEDIUMINT(8) NOT NULL,
root_msg_id MEDIUMINT(8) NOT NULL,
recipient_id MEDIUMINT(8),
normalized_subject VARCHAR(255) NOT NULL,
PRIMARY KEY (msg_id)
)
SQL
end
# this removes duplicate messages, converts the to_address to a number
# and stores the message_subject in lowercase and without the prefix "Re: "
def fill_temp_import_message_table
query(<<-SQL)
INSERT INTO #{@table_prefix}_import_privmsgs_temp (msg_id, root_msg_id, recipient_id, normalized_subject)
SELECT m.msg_id, m.root_level,
CASE WHEN m.root_level = 0 AND INSTR(m.to_address, ':') = 0 THEN
CAST(SUBSTRING(m.to_address, 3) AS SIGNED INTEGER)
ELSE NULL END AS recipient_id,
LOWER(CASE WHEN m.message_subject LIKE 'Re: %' THEN
SUBSTRING(m.message_subject, 5)
ELSE m.message_subject END) AS normalized_subject
FROM #{@table_prefix}_privmsgs m
WHERE NOT EXISTS (
SELECT 1
FROM #{@table_prefix}_privmsgs x
WHERE x.msg_id < m.msg_id AND x.root_level = m.root_level AND x.author_id = m.author_id
AND x.to_address = m.to_address AND x.message_time = m.message_time
)
SQL
end
def drop_import_message_table
query("DROP TABLE IF EXISTS #{@table_prefix}_import_privmsgs")
end
def create_import_message_table
query(<<-SQL)
CREATE TABLE #{@table_prefix}_import_privmsgs (
msg_id MEDIUMINT(8) NOT NULL,
root_msg_id MEDIUMINT(8) NOT NULL,
PRIMARY KEY (msg_id),
INDEX #{@table_prefix}_import_privmsgs_root_msg_id (root_msg_id)
)
SQL
end
# this tries to calculate the actual root_level (= msg_id of the first message in a
# private conversation) based on subject, time, author and recipient
def fill_import_message_table
query(<<-SQL)
INSERT INTO #{@table_prefix}_import_privmsgs (msg_id, root_msg_id)
SELECT m.msg_id, CASE WHEN i.root_msg_id = 0 THEN
COALESCE((
SELECT a.msg_id
FROM #{@table_prefix}_privmsgs a
JOIN #{@table_prefix}_import_privmsgs_temp b ON (a.msg_id = b.msg_id)
WHERE ((a.author_id = m.author_id AND b.recipient_id = i.recipient_id) OR
(a.author_id = i.recipient_id AND b.recipient_id = m.author_id))
AND b.normalized_subject = i.normalized_subject
AND a.msg_id <> m.msg_id
AND a.message_time < m.message_time
ORDER BY a.message_time
LIMIT 1
), 0) ELSE i.root_msg_id END AS root_msg_id
FROM #{@table_prefix}_privmsgs m
JOIN #{@table_prefix}_import_privmsgs_temp i ON (m.msg_id = i.msg_id)
SQL
end
end
end

View File

@ -118,18 +118,13 @@ module ImportScripts::PhpBB3
end
def import_private_messages
if @settings.fix_private_messages
puts '', 'fixing private messages'
@database.calculate_fixed_messages
end
puts '', 'creating private messages'
total_count = @database.count_messages(@settings.fix_private_messages)
total_count = @database.count_messages
importer = @importers.message_importer
last_msg_id = 0
batches do |offset|
rows, last_msg_id = @database.fetch_messages(@settings.fix_private_messages, last_msg_id)
rows, last_msg_id = @database.fetch_messages(last_msg_id)
break if rows.size < 1
next if all_records_exist?(:posts, importer.map_to_import_ids(rows))

View File

@ -14,7 +14,7 @@ module ImportScripts::PhpBB3
end
def map_to_import_ids(rows)
rows.map { |row| get_import_id(row) }
rows.map { |row| get_import_id(row[:msg_id]) }
end
@ -23,31 +23,38 @@ module ImportScripts::PhpBB3
attachments = import_attachments(row, user_id)
mapped = {
id: get_import_id(row),
id: get_import_id(row[:msg_id]),
user_id: user_id,
created_at: Time.zone.at(row[:message_time]),
raw: @text_processor.process_private_msg(row[:message_text], attachments)
}
if row[:root_msg_id] == 0
map_first_message(row, mapped)
root_user_ids = sorted_user_ids(row[:root_author_id], row[:root_to_address])
current_user_ids = sorted_user_ids(row[:author_id], row[:to_address])
topic_id = get_topic_id(row, root_user_ids, current_user_ids)
if topic_id.blank?
map_first_message(row, current_user_ids, mapped)
else
map_other_message(row, mapped)
map_other_message(row, topic_id, mapped)
end
end
protected
RE_PREFIX = 're: '
def import_attachments(row, user_id)
if @settings.import_attachments && row[:attachment_count] > 0
@attachment_importer.import_attachments(user_id, row[:msg_id])
end
end
def map_first_message(row, mapped)
mapped[:title] = CGI.unescapeHTML(row[:message_subject])
def map_first_message(row, current_user_ids, mapped)
mapped[:title] = get_topic_title(row)
mapped[:archetype] = Archetype.private_message
mapped[:target_usernames] = get_usernames(row[:msg_id], row[:author_id])
mapped[:target_usernames] = get_recipient_usernames(row)
mapped[:custom_fields] = {import_user_ids: current_user_ids.join(',')}
if mapped[:target_usernames].empty? # pm with yourself?
puts "Private message without recipients. Skipping #{row[:msg_id]}: #{row[:message_subject][0..40]}"
@ -57,36 +64,73 @@ module ImportScripts::PhpBB3
mapped
end
def map_other_message(row, mapped)
parent_msg_id = "pm:#{row[:root_msg_id]}"
parent = @lookup.topic_lookup_from_imported_post_id(parent_msg_id)
if parent.blank?
puts "Parent post #{parent_msg_id} doesn't exist. Skipping #{row[:msg_id]}: #{row[:message_subject][0..40]}"
return nil
end
mapped[:topic_id] = parent[:topic_id]
def map_other_message(row, topic_id, mapped)
mapped[:topic_id] = topic_id
mapped
end
def get_usernames(msg_id, author_id)
# Find the users who are part of this private message.
# Found from the to_address of phpbb_privmsgs, by looking at
# all the rows with the same root_msg_id.
def get_recipient_user_ids(to_address)
return [] if to_address.blank?
# to_address looks like this: "u_91:u_1234:u_200"
# The "u_" prefix is discarded and the rest is a user_id.
import_user_ids = @database.fetch_message_participants(msg_id, @settings.fix_private_messages)
.map { |r| r[:to_address].split(':') }
.flatten!.uniq.map! { |u| u[2..-1] }
user_ids = to_address.split(':')
user_ids.uniq!
user_ids.map! { |u| u[2..-1].to_i }
end
def get_recipient_usernames(row)
author_id = row[:author_id].to_s
import_user_ids = get_recipient_user_ids(row[:to_address])
import_user_ids.map! do |import_user_id|
import_user_id.to_s == author_id.to_s ? nil : @lookup.find_user_by_import_id(import_user_id).try(:username)
import_user_id.to_s == author_id ? nil : @lookup.find_user_by_import_id(import_user_id).try(:username)
end.compact
end
def get_import_id(row)
"pm:#{row[:msg_id]}"
def get_topic_title(row)
CGI.unescapeHTML(row[:message_subject])
end
def get_import_id(msg_id)
"pm:#{msg_id}"
end
# Creates a sorted array consisting of the message's author and recipients.
def sorted_user_ids(author_id, to_address)
user_ids = get_recipient_user_ids(to_address)
user_ids << author_id unless author_id.nil?
user_ids.uniq!
user_ids.sort!
end
def get_topic_id(row, root_user_ids, current_user_ids)
if row[:root_msg_id] == 0 || root_user_ids != current_user_ids
# Let's try to find an existing Discourse topic_id if this looks like a root message or
# the user IDs of the root message are different from the current message.
find_topic_id(row, current_user_ids)
else
# This appears to be a reply. Let's try to find the Discourse topic_id for this message.
parent_msg_id = get_import_id(row[:root_msg_id])
parent = @lookup.topic_lookup_from_imported_post_id(parent_msg_id)
parent[:topic_id] unless parent.blank?
end
end
# Tries to find a Discourse topic (private message) that has the same title as the current message.
# The users involved in these messages must match too.
def find_topic_id(row, current_user_ids)
topic_title = get_topic_title(row).downcase
topic_titles = [topic_title]
topic_titles << topic_title[RE_PREFIX.length..-1] if topic_title.start_with?(RE_PREFIX)
Post.select(:topic_id)
.joins(:topic)
.joins(:_custom_fields)
.where(["LOWER(topics.title) IN (:titles) AND post_custom_fields.name = 'import_user_ids' AND post_custom_fields.value = :user_ids",
{titles: topic_titles, user_ids: current_user_ids.join(',')}])
.order('topics.created_at DESC')
.first.try(:topic_id)
end
end
end

View File

@ -52,12 +52,6 @@ import:
private_messages: true
polls: true
# This tries to fix Private Messages that were imported from phpBB2 to phpBB3.
# You should enable this option if you see duplicate messages or lots of related
# messages as topics with just one post (e.g. 'Importer', 'Re: Importer', 'Re: Importer'
# should be one topic named 'Importer' and consist of 3 posts).
fix_private_messages: false
# When true: each imported user will have the original username from phpBB as its name
# When false: the name of each user will be blank
username_as_name: false

View File

@ -18,7 +18,6 @@ module ImportScripts::PhpBB3
attr_reader :import_remote_avatars
attr_reader :import_gallery_avatars
attr_reader :fix_private_messages
attr_reader :use_bbcode_to_md
attr_reader :original_site_prefix
@ -45,7 +44,6 @@ module ImportScripts::PhpBB3
@import_remote_avatars = avatar_settings['remote']
@import_gallery_avatars = avatar_settings['gallery']
@fix_private_messages = import_settings['fix_private_messages']
@use_bbcode_to_md =import_settings['use_bbcode_to_md']
@original_site_prefix = import_settings['site_prefix']['original']