# frozen_string_literal: true # Yammer importer # https://docs.microsoft.com/en-us/yammer/manage-security-and-compliance/export-yammer-enterprise-data#export-yammer-network-data-by-date-range-and-network # # You will need a bunch of CSV files: # # - Users.csv Groups.csv Topics.csv Groups.csv Files.csv Messages.csv # (Others included in Yammer export are ignored) require "csv" require_relative "base" require_relative "base/generic_database" # Call it like this: # RAILS_ENV=production bundle exec ruby script/import_scripts/yammer.rb DIRNAME class ImportScripts::Yammer < ImportScripts::Base BATCH_SIZE = 1000 NUM_WORDS_IN_TITLE = ENV["NUM_WORDS_IN_TITLE"].to_i || 20 SKIP_EMPTY_EMAIL = true SKIP_INACTIVE_USERS = false PARENT_CATEGORY_NAME = ENV["PARENT_CATEGORY_NAME"] || "Yammer Import" IMPORT_GROUPS_AS_TAGS = true MERGE_USERS = true # import groups as tags rather than as categories SiteSetting.tagging_enabled = true if IMPORT_GROUPS_AS_TAGS PM_TAG = ENV["PM_TAG"] || "eht" def initialize(path) super() @path = path @db = ImportScripts::GenericDatabase.new(@path, batch_size: BATCH_SIZE, recreate: true) end def execute create_developer_users read_csv_files import_categories import_users import_topics import_pm_topics import_posts import_pm_posts end def create_developer_users GlobalSetting .developer_emails .split(",") .each { |e| User.create(email: e, active: true, username: e.split("@")[0]) } end def read_csv_files puts "", "reading CSV files" # consider csv_parse Tags.csv # consider Admins.csv that has admins u_count = 0 csv_parse("Users") do |row| next if SKIP_INACTIVE_USERS && row[:state] != "active" u_count += 1 @db.insert_user( id: row[:id], email: row[:email], name: row[:name], username: row[:name], bio: "", # job_title: row[:job_title], # location: row[:location], # department: row[:department], created_at: parse_datetime(row[:joined_at]), # deleted_at: parse_datetime(row[:deleted_at]), # suspended_at: parse_datetime(row[:suspended_at]), # guid: row[:guid], # state: row[:state], avatar_path: row[:user_cover_image_url], # last_seen_at: , active: row[:state] == "active" ? 1 : 0, ) end category_position = 0 csv_parse("Groups") do |row| @db.insert_category( id: row[:id], name: row[:name], description: row[:description], position: category_position += 1, ) end csv_parse("Files") do |row| @db.insert_upload( id: row[:file_id], user_id: row[:uploader_id], original_filename: row[:name], filename: row[:path], description: row[:description], ) end # get topics from messages csv_parse("Messages") do |row| next unless row[:thread_id] == row[:id] next if row[:in_private_conversation] == "true" next unless row[:deleted_at].blank? # next if row[:message_type] == 'system' title = "" url = "" description = "" raw = row[:body] reg = /opengraphobject:\[(\d*?) : (.*?) : title="(.*?)" : description="(.*?)"\]/ if row[:attachments] row[:attachments].match(reg) do url = Regexp.last_match(2) title = Regexp.last_match(3) if Regexp.last_match(3) description = Regexp.last_match(4) raw += "\n***\n#{description}\n#{url}\n" unless raw.include?(url) end row[:attachments].match(/uploadedfile:(\d*)$/) do file_id = Regexp.last_match(1).to_i up = @db.fetch_upload(file_id).first path = File.join(@path, up["filename"]) filename = up["original_filename"] user_id = user_id_from_imported_user_id(row["user_id"]) || Discourse.system_user.id if File.exist?(path) upload = create_upload(user_id, path, filename) raw += html_for_upload(upload, filename) if upload&.persisted? end end end @db.insert_topic( id: row[:id], title: title, raw: raw, category_id: row[:group_id], closed: row[:closed] == "TRUE" ? 1 : 0, user_id: row[:sender_id], created_at: parse_datetime(row[:created_at]), ) end # get pm topics csv_parse("Messages") do |row| next unless row[:thread_id] == row[:id] next unless row[:in_private_conversation] == "true" next unless row[:deleted_at].blank? # next if row[:message_type] == 'system' title = "" url = "" description = "" raw = row[:body] reg = /opengraphobject:\[(\d*?) : (.*?) : title="(.*?)" : description="(.*?)"\]/ if row[:attachments] row[:attachments].match(reg) do url = Regexp.last_match(2) title = Regexp.last_match(3) if Regexp.last_match(3) description = Regexp.last_match(4) raw += "\n***\n#{description}\n#{url}\n" unless raw.include?(url) end row[:attachments].match(/uploadedfile:(\d*)$/) do file_id = Regexp.last_match(1).to_i up = @db.fetch_upload(file_id).first path = File.join(@path, up["filename"]) filename = up["original_filename"] user_id = user_id_from_imported_user_id(row["user_id"]) || Discourse.system_user.id if File.exist?(path) upload = create_upload(user_id, path, filename) raw += html_for_upload(upload, filename) if upload&.persisted? end end end @db.insert_pm_topic( id: row[:id], title: title, raw: raw, category_id: row[:group_id], closed: row[:closed] == "TRUE" ? 1 : 0, target_users: row[:participants].gsub("user:", ""), user_id: row[:sender_id], created_at: parse_datetime(row[:created_at]), ) end # get posts from messages csv_parse("Messages") do |row| next if row[:thread_id] == row[:id] next unless row[:deleted_at].blank? next if row[:in_private_conversation] == "true" @db.insert_post( id: row[:id], raw: row[:body] + "\n" + row[:attachments], topic_id: row[:thread_id], reply_to_post_id: row[:replied_to_id], user_id: row[:sender_id], created_at: parse_datetime(row[:created_at]), ) end # get pm posts from messages csv_parse("Messages") do |row| next if row[:thread_id] == row[:id] next unless row[:deleted_at].blank? next unless row[:in_private_conversation] == "false" @db.insert_pm_post( id: row[:id], raw: row[:body] + "\n" + row[:attachments], topic_id: row[:thread_id], reply_to_post_id: row[:replied_to_id], user_id: row[:sender_id], created_at: parse_datetime(row[:created_at]), ) end #@db.delete_unused_users @db.sort_posts_by_created_at end def parse_datetime(text) return nil if text.blank? || text == "null" DateTime.parse(text) end def import_categories puts "", "creating categories" parent_category = nil if !PARENT_CATEGORY_NAME.blank? parent_category = Category.find_by(name: PARENT_CATEGORY_NAME) parent_category = Category.create( name: PARENT_CATEGORY_NAME, user_id: Discourse.system_user.id, ) unless parent_category end if IMPORT_GROUPS_AS_TAGS @tag_map = {} rows = @db.fetch_categories rows.each { |row| @tag_map[row["id"]] = row["name"] } else rows = @db.fetch_categories create_categories(rows) do |row| { id: row["id"], name: row["name"], description: row["description"], position: row["position"], parent_category_id: parent_category, } end end end def batches super(BATCH_SIZE) end def import_users puts "", "creating users" total_count = @db.count_users puts "", "Got #{total_count} users!" last_id = "" batches do |offset| rows, last_id = @db.fetch_users(last_id) break if rows.empty? next if all_records_exist?(:users, rows.map { |row| row["id"] }) create_users(rows, total: total_count, offset: offset) do |row| user = User.find_by_email(row["email"].downcase) if user user.custom_fields["import_id"] = row["id"] user.custom_fields["matched_existing"] = "yes" user.save add_user(row["id"].to_s, user) next end { id: row["id"], email: row["email"], name: row["name"], created_at: row["created_at"], last_seen_at: row["last_seen_at"], active: row["active"] == 1, } end end end def import_topics puts "", "creating topics" staff_guardian = Guardian.new(Discourse.system_user) total_count = @db.count_topics last_id = "" batches do |offset| rows, last_id = @db.fetch_topics(last_id) base_category = Category.find_by(name: PARENT_CATEGORY_NAME) break if rows.empty? next if all_records_exist?(:posts, rows.map { |row| import_topic_id(row["id"]) }) create_posts(rows, total: total_count, offset: offset) do |row| { id: import_topic_id(row["id"]), title: ( if row["title"].present? row["title"] else row["raw"].split(/\W/)[0..(NUM_WORDS_IN_TITLE - 1)].join(" ") end ), raw: normalize_raw(row["raw"]), category: ( if IMPORT_GROUPS_AS_TAGS base_category.id else category_id_from_imported_category_id(row["category_id"]) end ), user_id: user_id_from_imported_user_id(row["user_id"]) || Discourse.system_user.id, created_at: row["created_at"], closed: row["closed"] == 1, post_create_action: proc do |post| if IMPORT_GROUPS_AS_TAGS topic = Topic.find(post.topic_id) tag_names = [@tag_map[row["category_id"]]] DiscourseTagging.tag_topic_by_names(topic, staff_guardian, tag_names) end end, } end end end def import_pm_topics puts "", "creating pm topics" staff_guardian = Guardian.new(Discourse.system_user) total_count = @db.count_pm_topics last_id = "" batches do |offset| rows, last_id = @db.fetch_pm_topics(last_id) base_category = Category.find_by(name: PARENT_CATEGORY_NAME) break if rows.empty? next if all_records_exist?(:posts, rows.map { |row| import_topic_id(row["id"]) }) create_posts(rows, total: total_count, offset: offset) do |row| target_users = [] row["target_users"] .split(",") .each do |u| user_id = user_id_from_imported_user_id(u) next unless user_id user = User.find(user_id) target_users.append(user.username) end target_usernames = target_users.join(",") { id: import_topic_id(row["id"]), title: ( if row["title"].present? row["title"] else row["raw"].split(/\W/)[0..(NUM_WORDS_IN_TITLE - 1)].join(" ") end ), raw: normalize_raw(row["raw"]), category: ( if IMPORT_GROUPS_AS_TAGS base_category.id else category_id_from_imported_category_id(row["category_id"]) end ), user_id: user_id_from_imported_user_id(row["user_id"]) || Discourse.system_user.id, created_at: row["created_at"], closed: row["closed"] == 1, archetype: Archetype.private_message, target_usernames: target_usernames, post_create_action: proc do |post| if PM_TAG topic = Topic.find(post.topic_id) tag_names = [PM_TAG] DiscourseTagging.tag_topic_by_names(topic, staff_guardian, tag_names) end end, } end end end def import_topic_id(topic_id) "T#{topic_id}" end def import_posts puts "", "creating posts" total_count = @db.count_posts last_row_id = 0 batches do |offset| rows, last_row_id = @db.fetch_sorted_posts(last_row_id) break if rows.empty? next if all_records_exist?(:posts, rows.map { |row| row["id"] }) create_posts(rows, total: total_count, offset: offset) do |row| topic = topic_lookup_from_imported_post_id(import_topic_id(row["topic_id"])) if topic.nil? p "MISSING TOPIC #{row["topic_id"]}" p row next end { id: row["id"], raw: normalize_raw(row["raw"]), user_id: user_id_from_imported_user_id(row["user_id"]) || Discourse.system_user.id, topic_id: topic[:topic_id], created_at: row["created_at"], } end end end def import_pm_posts puts "", "creating pm posts" total_count = @db.count_pm_posts last_row_id = 0 batches do |offset| rows, last_row_id = @db.fetch_pm_posts(last_row_id) break if rows.empty? next if all_records_exist?(:posts, rows.map { |row| row["id"] }) create_posts(rows, total: total_count, offset: offset) do |row| topic = topic_lookup_from_imported_post_id(import_topic_id(row["topic_id"])) if topic.nil? p "MISSING TOPIC #{row["topic_id"]}" p row next end { id: row["id"], raw: normalize_raw(row["raw"]), user_id: user_id_from_imported_user_id(row["user_id"]) || Discourse.system_user.id, topic_id: topic[:topic_id], created_at: row["created_at"], } end end end def normalize_raw(raw) raw = raw.gsub('\n', "") raw.gsub!(/\[\[user:(\d+)\]\]/) do u = Regexp.last_match(1) user_id = user_id_from_imported_user_id(u) || Discourse.system_user.id if user_id user = User.find(user_id) "@#{user.username}" else u end end raw end def permalink_exists?(url) Permalink.find_by(url: url) end def csv_parse(table_name) CSV.foreach( File.join(@path, "#{table_name}.csv"), headers: true, header_converters: :symbol, skip_blanks: true, encoding: "bom|utf-8", ) { |row| yield row } end end unless ARGV[0] && Dir.exist?(ARGV[0]) puts "", "Usage:", "", "bundle exec ruby script/import_scripts/yammer.rb DIRNAME", "" exit 1 end ImportScripts::Yammer.new(ARGV[0]).perform