diff --git a/script/bulk_import/base.rb b/script/bulk_import/base.rb index 9e5be0ab6a7..4cf7503717c 100644 --- a/script/bulk_import/base.rb +++ b/script/bulk_import/base.rb @@ -224,7 +224,7 @@ class BulkImport::Base def load_indexes puts "Loading groups indexes..." @last_group_id = last_id(Group) - group_names = Group.unscoped.pluck(:name).map(&:downcase).to_set + @group_names_lower = Group.unscoped.pluck(:name).map(&:downcase).to_set puts "Loading users indexes..." @last_user_id = last_id(User) @@ -232,7 +232,7 @@ class BulkImport::Base @last_sso_record_id = last_id(SingleSignOnRecord) @emails = UserEmail.pluck(:email, :user_id).to_h @external_ids = SingleSignOnRecord.pluck(:external_id, :user_id).to_h - @usernames_and_groupnames_lower = User.unscoped.pluck(:username_lower).to_set.merge(group_names) + @usernames_lower = User.unscoped.pluck(:username_lower).to_set @anonymized_user_suffixes = DB.query_single( "SELECT SUBSTRING(username_lower, 5)::BIGINT FROM users WHERE username_lower ~* '^anon\\d+$'", @@ -245,6 +245,9 @@ class BulkImport::Base .to_h @last_user_avatar_id = last_id(UserAvatar) @last_upload_id = last_id(Upload) + @user_ids_by_username_lower = User.unscoped.pluck(:id, :username_lower).to_h + @usernames_by_id = User.unscoped.pluck(:id, :username).to_h + @user_full_names_by_id = User.unscoped.where("name IS NOT NULL").pluck(:id, :name).to_h puts "Loading categories indexes..." @last_category_id = last_id(Category) @@ -354,6 +357,19 @@ class BulkImport::Base @users[id.to_i] end + def user_id_from_original_username(username) + normalized_username = User.normalize_username(@mapped_usernames[username] || username) + @user_ids_by_username_lower[normalized_username] + end + + def username_from_id(id) + @usernames_by_id[id] + end + + def user_full_name_from_id(id) + @user_full_names_by_id[id] + end + def category_id_from_imported_id(id) @categories[id.to_i] end @@ -547,6 +563,8 @@ class BulkImport::Base CATEGORY_TAG_GROUP_COLUMNS ||= %i[category_id tag_group_id created_at updated_at] + CATEGORY_USER_COLUMNS ||= %i[category_id user_id notification_level last_seen_at] + TOPIC_COLUMNS ||= %i[ id archetype @@ -745,6 +763,7 @@ class BulkImport::Base post_id category_id tag_id + user_id external_url created_at updated_at @@ -824,6 +843,10 @@ class BulkImport::Base create_records(rows, "category_tag_group", CATEGORY_TAG_GROUP_COLUMNS, &block) end + def create_category_users(rows, &block) + create_records(rows, "category_user", CATEGORY_USER_COLUMNS, &block) + end + def create_topics(rows, &block) create_records(rows, "topic", TOPIC_COLUMNS, &block) end @@ -925,9 +948,9 @@ class BulkImport::Base group[:name] = fix_name(group[:name]) - unless @usernames_and_groupnames_lower.add?(group[:name].downcase) + if group_or_user_exist?(group[:name]) group_name = group[:name] + "_1" - group_name.next! until @usernames_and_groupnames_lower.add?(group_name.downcase) + group_name.next! while group_or_user_exist?(group_name) group[:name] = group_name end @@ -945,6 +968,12 @@ class BulkImport::Base group end + def group_or_user_exist?(name) + name_lowercase = name.downcase + return true if @usernames_lower.include?(name_lowercase) + @group_names_lower.add?(name_lowercase).nil? + end + def process_user(user) if user[:email].present? user[:email].downcase! @@ -976,9 +1005,9 @@ class BulkImport::Base end # unique username_lower - unless @usernames_and_groupnames_lower.add?(user[:username].downcase) + if user_exist?(user[:username]) username = user[:username] + "_1" - username.next! until @usernames_and_groupnames_lower.add?(username.downcase) + username.next! while user_exist?(username) user[:username] = username end @@ -998,9 +1027,18 @@ class BulkImport::Base user[:date_of_birth] = Date.new(1904, date_of_birth.month, date_of_birth.day) end + @user_ids_by_username_lower[user[:username_lower]] = user[:id] + @usernames_by_id[user[:id]] = user[:username] + @user_full_names_by_id[user[:id]] = user[:name] if user[:name].present? + user end + def user_exist?(username) + username_lowercase = username.downcase + @usernames_lower.add?(username_lowercase).nil? + end + def process_user_email(user_email) user_email[:id] = @last_user_email_id += 1 user_email[:primary] = true @@ -1163,6 +1201,10 @@ class BulkImport::Base category_tag_group end + def process_category_user(category_user) + category_user + end + def process_topic(topic) @topics[topic[:imported_id].to_i] = topic[:id] = @last_topic_id += 1 topic[:archetype] ||= Archetype.default @@ -1682,21 +1724,22 @@ class BulkImport::Base cooked = @markdown.render(cooked).scrub.strip - cooked.gsub!(%r{\[QUOTE="?([^,"]+)(?:, post:(\d+), topic:(\d+))?"?\](.+?)\[/QUOTE\]}im) do - username, post_id, topic_id, quote = $1, $2, $3, $4 + cooked.gsub!( + %r{\[QUOTE=(?:"|")?(.+?)(?:, post:(\d+), topic:(\d+))?(?:, username:(.+?))?(?:"|")?\](.+?)\[/QUOTE\]}im, + ) do + name_or_username, post_id, topic_id, username, quote = $1, $2, $3, $4, $5 + username ||= name_or_username quote = quote.scrub.strip quote.gsub!(/^(
\n?)+/, "") quote.gsub!(/(
\n?)+$/, "") - user = User.find_by(username: username) - if post_id.present? && topic_id.present? <<-HTML @@ -1706,7 +1749,7 @@ class BulkImport::Base @@ -1726,8 +1769,8 @@ class BulkImport::Base upload_sha1 = Upload.sha1_from_short_url(short_url) upload_base62 = Upload.base62_sha1(upload_sha1) upload_id = @uploads_by_sha1[upload_sha1] - upload_url = @upload_urls_by_id[upload_id] - cdn_url = Discourse.store.cdn_url(upload_url) + upload_url = upload_id ? @upload_urls_by_id[upload_id] : nil + cdn_url = upload_url ? Discourse.store.cdn_url(upload_url) : "" attributes = +%{loading="lazy"} attributes << %{ alt="#{alt}"} if alt.present? @@ -1744,9 +1787,9 @@ class BulkImport::Base name = @mapped_usernames[$1] || $1 normalized_name = User.normalize_username(name) - if User.where(username_lower: normalized_name).exists? + if @usernames_lower.include?(normalized_name) %|@#{name}| - elsif Group.where("LOWER(name) = ?", normalized_name).exists? + elsif @group_names_lower.include?(normalized_name) %|@#{name}| else "@#{name}" @@ -1761,7 +1804,8 @@ class BulkImport::Base def user_avatar(user) url = user.avatar_template.gsub("{size}", "45") - "\"\" #{user.username}" + # TODO name/username preference check + "\"\" #{user.name.presence || user.username}" end def pre_fancy(title) diff --git a/script/bulk_import/generic_bulk.rb b/script/bulk_import/generic_bulk.rb index f3ad6e6227f..37ccf695f22 100644 --- a/script/bulk_import/generic_bulk.rb +++ b/script/bulk_import/generic_bulk.rb @@ -72,6 +72,7 @@ class BulkImport::Generic < BulkImport::Base import_category_custom_fields import_category_tag_groups import_category_permissions + import_category_users import_topics import_posts @@ -315,6 +316,33 @@ class BulkImport::Generic < BulkImport::Base permissions.close end + def import_category_users + puts "", "Importing category users..." + + category_users = query(<<~SQL) + SELECT * + FROM category_users + ORDER BY category_id, user_id + SQL + + existing_category_user_ids = CategoryUser.pluck(:category_id, :user_id).to_set + + create_category_users(category_users) do |row| + category_id = category_id_from_imported_id(row["category_id"]) + user_id = user_id_from_imported_id(row["user_id"]) + next if existing_category_user_ids.include?([category_id, user_id]) + + { + category_id: category_id, + user_id: user_id, + notification_level: row["notification_level"], + last_seen_at: to_datetime(row["last_seen_at"]), + } + end + + category_users.close + end + def import_groups puts "", "Importing groups..." @@ -465,9 +493,12 @@ class BulkImport::Generic < BulkImport::Base users = query(<<~SQL) SELECT id, timezone, email_level, email_messages_level, email_digests - FROM users - WHERE timezone IS NOT NULL - ORDER BY id + FROM users + WHERE timezone IS NOT NULL + OR email_level IS NOT NULL + OR email_messages_level IS NOT NULL + OR email_digests IS NOT NULL + ORDER BY id SQL existing_user_ids = UserOption.pluck(:user_id).to_set @@ -502,6 +533,8 @@ class BulkImport::Generic < BulkImport::Base user_fields.each do |row| next if existing_user_field_names.include?(row["name"]) + # TODO: Use `id` and store it in mapping table, but for now just ignore it. + row.delete("id") options = row.delete("options") field = UserField.create!(row) @@ -647,12 +680,10 @@ class BulkImport::Generic < BulkImport::Base posts = query(<<~SQL) SELECT * FROM posts - ORDER BY topic_id, id + ORDER BY topic_id, post_number, id SQL group_names = Group.pluck(:id, :name).to_h - # TODO: Investigate feasibility of loading all users on large sites - user_names = User.pluck(:id, :username).to_h create_posts(posts) do |row| next if row["raw"].blank? @@ -667,7 +698,7 @@ class BulkImport::Generic < BulkImport::Base topic_id: topic_id, user_id: user_id_from_imported_id(row["user_id"]), created_at: to_datetime(row["created_at"]), - raw: post_raw(row, group_names, user_names), + raw: post_raw(row, group_names), like_count: row["like_count"], reply_to_post_number: row["reply_to_post_id"] ? post_number_from_imported_id(row["reply_to_post_id"]) : nil, @@ -677,7 +708,7 @@ class BulkImport::Generic < BulkImport::Base posts.close end - def post_raw(row, group_names, user_names) + def post_raw(row, group_names) raw = row["raw"] placeholders = row["placeholders"]&.then { |json| JSON.parse(json) } @@ -706,13 +737,23 @@ class BulkImport::Generic < BulkImport::Base mentions.each do |mention| name = if mention["type"] == "user" - user_names[user_id_from_imported_id(mention["id"])] + if mention["id"] + username_from_id(user_id_from_imported_id(mention["id"])) + elsif mention["name"] + user_id = user_id_from_original_username(mention["name"]) + user_id ? username_from_id(user_id) : mention["name"] + end elsif mention["type"] == "group" - group_names[group_id_from_imported_id(mention["id"])] + if mention["id"] + group_id = group_id_from_imported_id(mention["id"]) + group_id ? group_names[group_id] : mention["name"] + else + mention["name"] + end end - puts "#{mention["type"]} not found -- #{mention["id"]}" unless name - raw.gsub!(mention["placeholder"], "@#{name}") + puts "#{mention["type"]} not found -- #{mention["placeholder"]}" unless name + raw.gsub!(mention["placeholder"], " @#{name} ") end end @@ -726,6 +767,72 @@ class BulkImport::Generic < BulkImport::Base raw.gsub!(event["placeholder"], event_bbcode(event_details)) if event_details end + if (quotes = placeholders&.fetch("quotes", nil)) + quotes.each do |quote| + user_id = + if quote["user_id"] + user_id_from_imported_id(quote["user_id"]) + elsif quote["username"] + user_id_from_original_username(quote["username"]) + end + + username = quote["username"] + name = nil + + if user_id + username = username_from_id(user_id) + name = user_full_name_from_id(user_id) + end + + bbcode = + if username.present? && name.present? + %Q|[quote="#{name}, username:#{username}"]| + elsif username.present? + %Q|[quote="#{username}"]| + else + "[quote]" + end + + raw.gsub!(quote["placeholder"], bbcode) + end + end + + if (links = placeholders&.fetch("links", nil)) + links.each do |link| + text = link["text"] + original_url = link["url"] + + markdown = + if link["topic_id"] + topic_id = topic_id_from_imported_id(link["topic_id"]) + url = topic_id ? "#{Discourse.base_url}/t/#{topic_id}" : original_url + text ? "[#{text}](#{url})" : url + elsif link["post_id"] + topic_id = topic_id_from_imported_post_id(link["post_id"]) + post_number = post_number_from_imported_id(link["post_id"]) + url = + ( + if topic_id && post_number + "#{Discourse.base_url}/t/#{topic_id}/#{post_number}" + else + original_url + end + ) + text ? "[#{text}](#{url})" : url + else + text ? "[#{text}](#{original_url})" : original_url + end + + # ensure that the placeholder is surrounded by whitespace unless it's at the beginning or end of the string + placeholder = link["placeholder"] + escaped_placeholder = Regexp.escape(placeholder) + raw.gsub!(/(?