From 2b2584912a380813b2ab6b2601a57317cfa43203 Mon Sep 17 00:00:00 2001 From: Gerhard Schlager Date: Fri, 3 Apr 2020 18:10:52 +0200 Subject: [PATCH] Improve Telligent import script * Imports private messages * Replaces internal links for topics and replies * Allows incremental import of accepted answers --- script/import_scripts/telligent.rb | 164 +++++++++++++++++++++++++++-- 1 file changed, 158 insertions(+), 6 deletions(-) diff --git a/script/import_scripts/telligent.rb b/script/import_scripts/telligent.rb index d9b364fb155..a6b1218e41d 100644 --- a/script/import_scripts/telligent.rb +++ b/script/import_scripts/telligent.rb @@ -50,6 +50,7 @@ class ImportScripts::Telligent < ImportScripts::Base /\[(?img|url)=[^\[]*?cfs-file(?:systemfile)?(?:\.ashx)?\/__key\/(?[^\/]+)\/(?[^\/]+)\/(?.+?)\][^\[]*?\[\/\k\]/i ] PROPERTY_NAMES_REGEX ||= /(?\w+):S:(?\d+):(?\d+):/ + INTERNAL_LINK_REGEX ||= /\shref=".*?\/f\/\d+(?:(\/t\/(?\d+))|(?:\/p\/\d+\/(?\d+))|(?:\/p\/(?\d+)\/reply))\.aspx[^"]*?"/i CATEGORY_LINK_NORMALIZATION = '/.*?(f\/\d+)$/\1' TOPIC_LINK_NORMALIZATION = '/.*?(f\/\d+\/t\/\d+)$/\1' @@ -109,6 +110,7 @@ class ImportScripts::Telligent < ImportScripts::Base import_users import_topics import_posts + import_messages mark_topics_as_solved end @@ -127,7 +129,12 @@ class ImportScripts::Telligent < ImportScripts::Base WHERE t.UserId = u.UserID) OR EXISTS(SELECT 1 FROM te_Forum_ThreadReplies r - WHERE r.UserId = u.UserID) + WHERE r.UserId = u.UserID) OR + EXISTS(SELECT 1 + FROM cs_Messaging_ConversationParticipants p + JOIN cs_Messaging_ConversationMessages cm ON p.ConversationId = cm.ConversationId + JOIN cs_Messaging_Messages m ON m.MessageId = cm.MessageId + WHERE p.ParticipantId = u.UserID) ) SQL @@ -508,6 +515,116 @@ class ImportScripts::Telligent < ImportScripts::Base end end + def import_messages + puts "", "Importing messages..." + + current_conversation_id = "" + current_topic_import_id = "" + + last_conversation_id = "" + + total_count = count(<<~SQL) + SELECT COUNT(1) AS count + FROM cs_Messaging_Messages m + JOIN cs_Messaging_ConversationMessages cm ON m.MessageId = cm.MessageId + SQL + + batches do |offset| + if last_conversation_id.blank? + conditions = "" + else + conditions = <<~SQL + WHERE cm.ConversationId > '#{last_conversation_id}' + SQL + end + + rows = query(<<~SQL) + SELECT TOP #{BATCH_SIZE} + cm.ConversationId, m.MessageId, m.AuthorId, m.Subject, m.Body, m.DateCreated, + STUFF((SELECT ';' + CONVERT(VARCHAR, p.ParticipantId) + FROM cs_Messaging_ConversationParticipants p + WHERE p.ConversationId = cm.ConversationId + ORDER BY p.ParticipantId + FOR XML PATH('')), 1, 1, '') AS ParticipantIds + FROM cs_Messaging_Messages m + JOIN cs_Messaging_ConversationMessages cm ON m.MessageId = cm.MessageId + #{conditions} + ORDER BY cm.ConversationId, m.DateCreated, m.MessageId + SQL + + break if rows.blank? + + last_row = rows[-1] + last_conversation_id = last_row["ConversationId"] + next if all_records_exist?(:post, rows.map { |row| row["MessageId"] }) + + create_posts(rows, total: total_count, offset: offset) do |row| + user_id = user_id_from_imported_user_id(row["AuthorId"]) || Discourse::SYSTEM_USER_ID + + post = { + id: row["MessageId"], + raw: raw_with_attachment(row, user_id, :message), + user_id: user_id, + created_at: row["DateCreated"] + } + + if current_conversation_id == row["ConversationId"] + parent_post = topic_lookup_from_imported_post_id(current_topic_import_id) + + if parent_post + post[:topic_id] = parent_post[:topic_id] + else + puts "Failed to import message #{row['MessageId']}. Parent was not found." + post = nil + end + else + post[:title] = CGI.unescapeHTML(row["Subject"]) + post[:archetype] = Archetype.private_message + post[:target_usernames] = get_recipient_usernames(row) + + if post[:target_usernames].empty? + puts "Private message without recipients. Skipping #{row['MessageId']}" + post = nil + end + + current_topic_import_id = row["MessageId"] + end + + current_conversation_id = row["ConversationId"] + post + end + end + + # Mark all imported messages as read + DB.exec(<<~SQL) + UPDATE topic_users tu + SET last_read_post_number = t.highest_post_number, + highest_seen_post_number = t.highest_post_number + FROM topics t + JOIN topic_custom_fields tcf ON t.id = tcf.topic_id + WHERE tu.topic_id = t.id + AND tu.user_id > 0 + AND t.archetype = 'private_message' + AND tcf.name = 'import_id' + SQL + end + + def get_recipient_user_ids(participant_ids) + return [] if participant_ids.blank? + + user_ids = participant_ids.split(';') + user_ids.uniq! + user_ids.map!(&:strip) + end + + def get_recipient_usernames(row) + import_user_ids = get_recipient_user_ids(row["ParticipantIds"]) + + import_user_ids.map! do |import_user_id| + find_user_by_import_id(import_user_id).try(:username) + end.compact + end + def index_directory(root_directory) Dir.foreach(root_directory) do |directory_name| next if directory_name == "." || directory_name == ".." @@ -524,7 +641,7 @@ class ImportScripts::Telligent < ImportScripts::Base end def raw_with_attachment(row, user_id, type) - raw, embedded_paths, upload_ids = replace_embedded_attachments(row["Body"], user_id) + raw, embedded_paths, upload_ids = replace_embedded_attachments(row, user_id, type) raw = html_to_markdown(raw) || "" filename = row["FileName"] @@ -551,15 +668,28 @@ class ImportScripts::Telligent < ImportScripts::Base raw = "#{raw}\n#{@uploader.html_for_upload(upload, filename)}" end else - id = type == :topic ? row['ThreadId'] : row['ThreadReplyId'] - STDERR.puts "Could not find file for #{type} #{id}: #{path}" + print_file_not_found_error(type, path, row) end end raw end - def replace_embedded_attachments(raw, user_id) + def print_file_not_found_error(type, path, row) + case type + when :topic + id = row['ThreadId'] + when :post + id = row['ThreadReplyId'] + when :message + id = row['MessageId'] + end + + STDERR.puts "Could not find file for #{type} #{id}: #{path}" + end + + def replace_embedded_attachments(row, user_id, type) + raw = row["Body"] paths = [] upload_ids = [] @@ -583,7 +713,7 @@ class ImportScripts::Telligent < ImportScripts::Base end else path = File.join(path, match_data[:filename]) - STDERR.puts "Could not find file: #{path}" + print_file_not_found_error(type, path, row) match_data[0] end end @@ -673,6 +803,8 @@ class ImportScripts::Telligent < ImportScripts::Base def html_to_markdown(html) return html if html.blank? + html = fix_internal_links(html) + md = HtmlToMarkdown.new(html).to_markdown md.gsub!(/\[quote.*?\]/, "\n" + '\0' + "\n") md.gsub!(/(?