DEV: Improve generic import script (#25972)
* FEATURE: Import into `category_users` table * FIX: Failed to import `user_options` unless `timezone` was set * FIX: Prevent reusing original `id` from intermediate DB in `user_fields` * FEATURE: Order posts by `post_nuber` if available * FEATURE: Allow `[mention]` placeholder to reference users by"id" or "name" (username) * FEATURE: Support `[quote]` placeholders in posts * FEATURE: Support `[link]` placeholders in posts * FEATURE: Support all kinds of permalinks and remove support for `old_relative_url` * PERF: Speed up pre-cooking by removing DB lookups
This commit is contained in:
parent
5c1147adf3
commit
bc98740205
|
@ -224,7 +224,7 @@ class BulkImport::Base
|
||||||
def load_indexes
|
def load_indexes
|
||||||
puts "Loading groups indexes..."
|
puts "Loading groups indexes..."
|
||||||
@last_group_id = last_id(Group)
|
@last_group_id = last_id(Group)
|
||||||
group_names = Group.unscoped.pluck(:name).map(&:downcase).to_set
|
@group_names_lower = Group.unscoped.pluck(:name).map(&:downcase).to_set
|
||||||
|
|
||||||
puts "Loading users indexes..."
|
puts "Loading users indexes..."
|
||||||
@last_user_id = last_id(User)
|
@last_user_id = last_id(User)
|
||||||
|
@ -232,7 +232,7 @@ class BulkImport::Base
|
||||||
@last_sso_record_id = last_id(SingleSignOnRecord)
|
@last_sso_record_id = last_id(SingleSignOnRecord)
|
||||||
@emails = UserEmail.pluck(:email, :user_id).to_h
|
@emails = UserEmail.pluck(:email, :user_id).to_h
|
||||||
@external_ids = SingleSignOnRecord.pluck(:external_id, :user_id).to_h
|
@external_ids = SingleSignOnRecord.pluck(:external_id, :user_id).to_h
|
||||||
@usernames_and_groupnames_lower = User.unscoped.pluck(:username_lower).to_set.merge(group_names)
|
@usernames_lower = User.unscoped.pluck(:username_lower).to_set
|
||||||
@anonymized_user_suffixes =
|
@anonymized_user_suffixes =
|
||||||
DB.query_single(
|
DB.query_single(
|
||||||
"SELECT SUBSTRING(username_lower, 5)::BIGINT FROM users WHERE username_lower ~* '^anon\\d+$'",
|
"SELECT SUBSTRING(username_lower, 5)::BIGINT FROM users WHERE username_lower ~* '^anon\\d+$'",
|
||||||
|
@ -245,6 +245,9 @@ class BulkImport::Base
|
||||||
.to_h
|
.to_h
|
||||||
@last_user_avatar_id = last_id(UserAvatar)
|
@last_user_avatar_id = last_id(UserAvatar)
|
||||||
@last_upload_id = last_id(Upload)
|
@last_upload_id = last_id(Upload)
|
||||||
|
@user_ids_by_username_lower = User.unscoped.pluck(:id, :username_lower).to_h
|
||||||
|
@usernames_by_id = User.unscoped.pluck(:id, :username).to_h
|
||||||
|
@user_full_names_by_id = User.unscoped.where("name IS NOT NULL").pluck(:id, :name).to_h
|
||||||
|
|
||||||
puts "Loading categories indexes..."
|
puts "Loading categories indexes..."
|
||||||
@last_category_id = last_id(Category)
|
@last_category_id = last_id(Category)
|
||||||
|
@ -354,6 +357,19 @@ class BulkImport::Base
|
||||||
@users[id.to_i]
|
@users[id.to_i]
|
||||||
end
|
end
|
||||||
|
|
||||||
|
def user_id_from_original_username(username)
|
||||||
|
normalized_username = User.normalize_username(@mapped_usernames[username] || username)
|
||||||
|
@user_ids_by_username_lower[normalized_username]
|
||||||
|
end
|
||||||
|
|
||||||
|
def username_from_id(id)
|
||||||
|
@usernames_by_id[id]
|
||||||
|
end
|
||||||
|
|
||||||
|
def user_full_name_from_id(id)
|
||||||
|
@user_full_names_by_id[id]
|
||||||
|
end
|
||||||
|
|
||||||
def category_id_from_imported_id(id)
|
def category_id_from_imported_id(id)
|
||||||
@categories[id.to_i]
|
@categories[id.to_i]
|
||||||
end
|
end
|
||||||
|
@ -547,6 +563,8 @@ class BulkImport::Base
|
||||||
|
|
||||||
CATEGORY_TAG_GROUP_COLUMNS ||= %i[category_id tag_group_id created_at updated_at]
|
CATEGORY_TAG_GROUP_COLUMNS ||= %i[category_id tag_group_id created_at updated_at]
|
||||||
|
|
||||||
|
CATEGORY_USER_COLUMNS ||= %i[category_id user_id notification_level last_seen_at]
|
||||||
|
|
||||||
TOPIC_COLUMNS ||= %i[
|
TOPIC_COLUMNS ||= %i[
|
||||||
id
|
id
|
||||||
archetype
|
archetype
|
||||||
|
@ -745,6 +763,7 @@ class BulkImport::Base
|
||||||
post_id
|
post_id
|
||||||
category_id
|
category_id
|
||||||
tag_id
|
tag_id
|
||||||
|
user_id
|
||||||
external_url
|
external_url
|
||||||
created_at
|
created_at
|
||||||
updated_at
|
updated_at
|
||||||
|
@ -824,6 +843,10 @@ class BulkImport::Base
|
||||||
create_records(rows, "category_tag_group", CATEGORY_TAG_GROUP_COLUMNS, &block)
|
create_records(rows, "category_tag_group", CATEGORY_TAG_GROUP_COLUMNS, &block)
|
||||||
end
|
end
|
||||||
|
|
||||||
|
def create_category_users(rows, &block)
|
||||||
|
create_records(rows, "category_user", CATEGORY_USER_COLUMNS, &block)
|
||||||
|
end
|
||||||
|
|
||||||
def create_topics(rows, &block)
|
def create_topics(rows, &block)
|
||||||
create_records(rows, "topic", TOPIC_COLUMNS, &block)
|
create_records(rows, "topic", TOPIC_COLUMNS, &block)
|
||||||
end
|
end
|
||||||
|
@ -925,9 +948,9 @@ class BulkImport::Base
|
||||||
|
|
||||||
group[:name] = fix_name(group[:name])
|
group[:name] = fix_name(group[:name])
|
||||||
|
|
||||||
unless @usernames_and_groupnames_lower.add?(group[:name].downcase)
|
if group_or_user_exist?(group[:name])
|
||||||
group_name = group[:name] + "_1"
|
group_name = group[:name] + "_1"
|
||||||
group_name.next! until @usernames_and_groupnames_lower.add?(group_name.downcase)
|
group_name.next! while group_or_user_exist?(group_name)
|
||||||
group[:name] = group_name
|
group[:name] = group_name
|
||||||
end
|
end
|
||||||
|
|
||||||
|
@ -945,6 +968,12 @@ class BulkImport::Base
|
||||||
group
|
group
|
||||||
end
|
end
|
||||||
|
|
||||||
|
def group_or_user_exist?(name)
|
||||||
|
name_lowercase = name.downcase
|
||||||
|
return true if @usernames_lower.include?(name_lowercase)
|
||||||
|
@group_names_lower.add?(name_lowercase).nil?
|
||||||
|
end
|
||||||
|
|
||||||
def process_user(user)
|
def process_user(user)
|
||||||
if user[:email].present?
|
if user[:email].present?
|
||||||
user[:email].downcase!
|
user[:email].downcase!
|
||||||
|
@ -976,9 +1005,9 @@ class BulkImport::Base
|
||||||
end
|
end
|
||||||
|
|
||||||
# unique username_lower
|
# unique username_lower
|
||||||
unless @usernames_and_groupnames_lower.add?(user[:username].downcase)
|
if user_exist?(user[:username])
|
||||||
username = user[:username] + "_1"
|
username = user[:username] + "_1"
|
||||||
username.next! until @usernames_and_groupnames_lower.add?(username.downcase)
|
username.next! while user_exist?(username)
|
||||||
user[:username] = username
|
user[:username] = username
|
||||||
end
|
end
|
||||||
|
|
||||||
|
@ -998,9 +1027,18 @@ class BulkImport::Base
|
||||||
user[:date_of_birth] = Date.new(1904, date_of_birth.month, date_of_birth.day)
|
user[:date_of_birth] = Date.new(1904, date_of_birth.month, date_of_birth.day)
|
||||||
end
|
end
|
||||||
|
|
||||||
|
@user_ids_by_username_lower[user[:username_lower]] = user[:id]
|
||||||
|
@usernames_by_id[user[:id]] = user[:username]
|
||||||
|
@user_full_names_by_id[user[:id]] = user[:name] if user[:name].present?
|
||||||
|
|
||||||
user
|
user
|
||||||
end
|
end
|
||||||
|
|
||||||
|
def user_exist?(username)
|
||||||
|
username_lowercase = username.downcase
|
||||||
|
@usernames_lower.add?(username_lowercase).nil?
|
||||||
|
end
|
||||||
|
|
||||||
def process_user_email(user_email)
|
def process_user_email(user_email)
|
||||||
user_email[:id] = @last_user_email_id += 1
|
user_email[:id] = @last_user_email_id += 1
|
||||||
user_email[:primary] = true
|
user_email[:primary] = true
|
||||||
|
@ -1163,6 +1201,10 @@ class BulkImport::Base
|
||||||
category_tag_group
|
category_tag_group
|
||||||
end
|
end
|
||||||
|
|
||||||
|
def process_category_user(category_user)
|
||||||
|
category_user
|
||||||
|
end
|
||||||
|
|
||||||
def process_topic(topic)
|
def process_topic(topic)
|
||||||
@topics[topic[:imported_id].to_i] = topic[:id] = @last_topic_id += 1
|
@topics[topic[:imported_id].to_i] = topic[:id] = @last_topic_id += 1
|
||||||
topic[:archetype] ||= Archetype.default
|
topic[:archetype] ||= Archetype.default
|
||||||
|
@ -1682,21 +1724,22 @@ class BulkImport::Base
|
||||||
|
|
||||||
cooked = @markdown.render(cooked).scrub.strip
|
cooked = @markdown.render(cooked).scrub.strip
|
||||||
|
|
||||||
cooked.gsub!(%r{\[QUOTE="?([^,"]+)(?:, post:(\d+), topic:(\d+))?"?\](.+?)\[/QUOTE\]}im) do
|
cooked.gsub!(
|
||||||
username, post_id, topic_id, quote = $1, $2, $3, $4
|
%r{\[QUOTE=(?:"|")?(.+?)(?:, post:(\d+), topic:(\d+))?(?:, username:(.+?))?(?:"|")?\](.+?)\[/QUOTE\]}im,
|
||||||
|
) do
|
||||||
|
name_or_username, post_id, topic_id, username, quote = $1, $2, $3, $4, $5
|
||||||
|
username ||= name_or_username
|
||||||
|
|
||||||
quote = quote.scrub.strip
|
quote = quote.scrub.strip
|
||||||
quote.gsub!(/^(<br>\n?)+/, "")
|
quote.gsub!(/^(<br>\n?)+/, "")
|
||||||
quote.gsub!(/(<br>\n?)+$/, "")
|
quote.gsub!(/(<br>\n?)+$/, "")
|
||||||
|
|
||||||
user = User.find_by(username: username)
|
|
||||||
|
|
||||||
if post_id.present? && topic_id.present?
|
if post_id.present? && topic_id.present?
|
||||||
<<-HTML
|
<<-HTML
|
||||||
<aside class="quote" data-post="#{post_id}" data-topic="#{topic_id}">
|
<aside class="quote" data-post="#{post_id}" data-topic="#{topic_id}">
|
||||||
<div class="title">
|
<div class="title">
|
||||||
<div class="quote-controls"></div>
|
<div class="quote-controls"></div>
|
||||||
#{user ? user_avatar(user) : username}:
|
#{name_or_username}:
|
||||||
</div>
|
</div>
|
||||||
<blockquote>#{quote}</blockquote>
|
<blockquote>#{quote}</blockquote>
|
||||||
</aside>
|
</aside>
|
||||||
|
@ -1706,7 +1749,7 @@ class BulkImport::Base
|
||||||
<aside class="quote no-group" data-username="#{username}">
|
<aside class="quote no-group" data-username="#{username}">
|
||||||
<div class="title">
|
<div class="title">
|
||||||
<div class="quote-controls"></div>
|
<div class="quote-controls"></div>
|
||||||
#{user ? user_avatar(user) : username}:
|
#{name_or_username}:
|
||||||
</div>
|
</div>
|
||||||
<blockquote>#{quote}</blockquote>
|
<blockquote>#{quote}</blockquote>
|
||||||
</aside>
|
</aside>
|
||||||
|
@ -1726,8 +1769,8 @@ class BulkImport::Base
|
||||||
upload_sha1 = Upload.sha1_from_short_url(short_url)
|
upload_sha1 = Upload.sha1_from_short_url(short_url)
|
||||||
upload_base62 = Upload.base62_sha1(upload_sha1)
|
upload_base62 = Upload.base62_sha1(upload_sha1)
|
||||||
upload_id = @uploads_by_sha1[upload_sha1]
|
upload_id = @uploads_by_sha1[upload_sha1]
|
||||||
upload_url = @upload_urls_by_id[upload_id]
|
upload_url = upload_id ? @upload_urls_by_id[upload_id] : nil
|
||||||
cdn_url = Discourse.store.cdn_url(upload_url)
|
cdn_url = upload_url ? Discourse.store.cdn_url(upload_url) : ""
|
||||||
|
|
||||||
attributes = +%{loading="lazy"}
|
attributes = +%{loading="lazy"}
|
||||||
attributes << %{ alt="#{alt}"} if alt.present?
|
attributes << %{ alt="#{alt}"} if alt.present?
|
||||||
|
@ -1744,9 +1787,9 @@ class BulkImport::Base
|
||||||
name = @mapped_usernames[$1] || $1
|
name = @mapped_usernames[$1] || $1
|
||||||
normalized_name = User.normalize_username(name)
|
normalized_name = User.normalize_username(name)
|
||||||
|
|
||||||
if User.where(username_lower: normalized_name).exists?
|
if @usernames_lower.include?(normalized_name)
|
||||||
%|<a class="mention" href="/u/#{normalized_name}">@#{name}</a>|
|
%|<a class="mention" href="/u/#{normalized_name}">@#{name}</a>|
|
||||||
elsif Group.where("LOWER(name) = ?", normalized_name).exists?
|
elsif @group_names_lower.include?(normalized_name)
|
||||||
%|<a class="mention-group" href="/groups/#{normalized_name}">@#{name}</a>|
|
%|<a class="mention-group" href="/groups/#{normalized_name}">@#{name}</a>|
|
||||||
else
|
else
|
||||||
"@#{name}"
|
"@#{name}"
|
||||||
|
@ -1761,7 +1804,8 @@ class BulkImport::Base
|
||||||
|
|
||||||
def user_avatar(user)
|
def user_avatar(user)
|
||||||
url = user.avatar_template.gsub("{size}", "45")
|
url = user.avatar_template.gsub("{size}", "45")
|
||||||
"<img alt=\"\" width=\"20\" height=\"20\" src=\"#{url}\" class=\"avatar\"> #{user.username}"
|
# TODO name/username preference check
|
||||||
|
"<img alt=\"\" width=\"20\" height=\"20\" src=\"#{url}\" class=\"avatar\"> #{user.name.presence || user.username}"
|
||||||
end
|
end
|
||||||
|
|
||||||
def pre_fancy(title)
|
def pre_fancy(title)
|
||||||
|
|
|
@ -72,6 +72,7 @@ class BulkImport::Generic < BulkImport::Base
|
||||||
import_category_custom_fields
|
import_category_custom_fields
|
||||||
import_category_tag_groups
|
import_category_tag_groups
|
||||||
import_category_permissions
|
import_category_permissions
|
||||||
|
import_category_users
|
||||||
|
|
||||||
import_topics
|
import_topics
|
||||||
import_posts
|
import_posts
|
||||||
|
@ -315,6 +316,33 @@ class BulkImport::Generic < BulkImport::Base
|
||||||
permissions.close
|
permissions.close
|
||||||
end
|
end
|
||||||
|
|
||||||
|
def import_category_users
|
||||||
|
puts "", "Importing category users..."
|
||||||
|
|
||||||
|
category_users = query(<<~SQL)
|
||||||
|
SELECT *
|
||||||
|
FROM category_users
|
||||||
|
ORDER BY category_id, user_id
|
||||||
|
SQL
|
||||||
|
|
||||||
|
existing_category_user_ids = CategoryUser.pluck(:category_id, :user_id).to_set
|
||||||
|
|
||||||
|
create_category_users(category_users) do |row|
|
||||||
|
category_id = category_id_from_imported_id(row["category_id"])
|
||||||
|
user_id = user_id_from_imported_id(row["user_id"])
|
||||||
|
next if existing_category_user_ids.include?([category_id, user_id])
|
||||||
|
|
||||||
|
{
|
||||||
|
category_id: category_id,
|
||||||
|
user_id: user_id,
|
||||||
|
notification_level: row["notification_level"],
|
||||||
|
last_seen_at: to_datetime(row["last_seen_at"]),
|
||||||
|
}
|
||||||
|
end
|
||||||
|
|
||||||
|
category_users.close
|
||||||
|
end
|
||||||
|
|
||||||
def import_groups
|
def import_groups
|
||||||
puts "", "Importing groups..."
|
puts "", "Importing groups..."
|
||||||
|
|
||||||
|
@ -465,9 +493,12 @@ class BulkImport::Generic < BulkImport::Base
|
||||||
|
|
||||||
users = query(<<~SQL)
|
users = query(<<~SQL)
|
||||||
SELECT id, timezone, email_level, email_messages_level, email_digests
|
SELECT id, timezone, email_level, email_messages_level, email_digests
|
||||||
FROM users
|
FROM users
|
||||||
WHERE timezone IS NOT NULL
|
WHERE timezone IS NOT NULL
|
||||||
ORDER BY id
|
OR email_level IS NOT NULL
|
||||||
|
OR email_messages_level IS NOT NULL
|
||||||
|
OR email_digests IS NOT NULL
|
||||||
|
ORDER BY id
|
||||||
SQL
|
SQL
|
||||||
|
|
||||||
existing_user_ids = UserOption.pluck(:user_id).to_set
|
existing_user_ids = UserOption.pluck(:user_id).to_set
|
||||||
|
@ -502,6 +533,8 @@ class BulkImport::Generic < BulkImport::Base
|
||||||
user_fields.each do |row|
|
user_fields.each do |row|
|
||||||
next if existing_user_field_names.include?(row["name"])
|
next if existing_user_field_names.include?(row["name"])
|
||||||
|
|
||||||
|
# TODO: Use `id` and store it in mapping table, but for now just ignore it.
|
||||||
|
row.delete("id")
|
||||||
options = row.delete("options")
|
options = row.delete("options")
|
||||||
field = UserField.create!(row)
|
field = UserField.create!(row)
|
||||||
|
|
||||||
|
@ -647,12 +680,10 @@ class BulkImport::Generic < BulkImport::Base
|
||||||
posts = query(<<~SQL)
|
posts = query(<<~SQL)
|
||||||
SELECT *
|
SELECT *
|
||||||
FROM posts
|
FROM posts
|
||||||
ORDER BY topic_id, id
|
ORDER BY topic_id, post_number, id
|
||||||
SQL
|
SQL
|
||||||
|
|
||||||
group_names = Group.pluck(:id, :name).to_h
|
group_names = Group.pluck(:id, :name).to_h
|
||||||
# TODO: Investigate feasibility of loading all users on large sites
|
|
||||||
user_names = User.pluck(:id, :username).to_h
|
|
||||||
|
|
||||||
create_posts(posts) do |row|
|
create_posts(posts) do |row|
|
||||||
next if row["raw"].blank?
|
next if row["raw"].blank?
|
||||||
|
@ -667,7 +698,7 @@ class BulkImport::Generic < BulkImport::Base
|
||||||
topic_id: topic_id,
|
topic_id: topic_id,
|
||||||
user_id: user_id_from_imported_id(row["user_id"]),
|
user_id: user_id_from_imported_id(row["user_id"]),
|
||||||
created_at: to_datetime(row["created_at"]),
|
created_at: to_datetime(row["created_at"]),
|
||||||
raw: post_raw(row, group_names, user_names),
|
raw: post_raw(row, group_names),
|
||||||
like_count: row["like_count"],
|
like_count: row["like_count"],
|
||||||
reply_to_post_number:
|
reply_to_post_number:
|
||||||
row["reply_to_post_id"] ? post_number_from_imported_id(row["reply_to_post_id"]) : nil,
|
row["reply_to_post_id"] ? post_number_from_imported_id(row["reply_to_post_id"]) : nil,
|
||||||
|
@ -677,7 +708,7 @@ class BulkImport::Generic < BulkImport::Base
|
||||||
posts.close
|
posts.close
|
||||||
end
|
end
|
||||||
|
|
||||||
def post_raw(row, group_names, user_names)
|
def post_raw(row, group_names)
|
||||||
raw = row["raw"]
|
raw = row["raw"]
|
||||||
placeholders = row["placeholders"]&.then { |json| JSON.parse(json) }
|
placeholders = row["placeholders"]&.then { |json| JSON.parse(json) }
|
||||||
|
|
||||||
|
@ -706,13 +737,23 @@ class BulkImport::Generic < BulkImport::Base
|
||||||
mentions.each do |mention|
|
mentions.each do |mention|
|
||||||
name =
|
name =
|
||||||
if mention["type"] == "user"
|
if mention["type"] == "user"
|
||||||
user_names[user_id_from_imported_id(mention["id"])]
|
if mention["id"]
|
||||||
|
username_from_id(user_id_from_imported_id(mention["id"]))
|
||||||
|
elsif mention["name"]
|
||||||
|
user_id = user_id_from_original_username(mention["name"])
|
||||||
|
user_id ? username_from_id(user_id) : mention["name"]
|
||||||
|
end
|
||||||
elsif mention["type"] == "group"
|
elsif mention["type"] == "group"
|
||||||
group_names[group_id_from_imported_id(mention["id"])]
|
if mention["id"]
|
||||||
|
group_id = group_id_from_imported_id(mention["id"])
|
||||||
|
group_id ? group_names[group_id] : mention["name"]
|
||||||
|
else
|
||||||
|
mention["name"]
|
||||||
|
end
|
||||||
end
|
end
|
||||||
|
|
||||||
puts "#{mention["type"]} not found -- #{mention["id"]}" unless name
|
puts "#{mention["type"]} not found -- #{mention["placeholder"]}" unless name
|
||||||
raw.gsub!(mention["placeholder"], "@#{name}")
|
raw.gsub!(mention["placeholder"], " @#{name} ")
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
|
|
||||||
|
@ -726,6 +767,72 @@ class BulkImport::Generic < BulkImport::Base
|
||||||
raw.gsub!(event["placeholder"], event_bbcode(event_details)) if event_details
|
raw.gsub!(event["placeholder"], event_bbcode(event_details)) if event_details
|
||||||
end
|
end
|
||||||
|
|
||||||
|
if (quotes = placeholders&.fetch("quotes", nil))
|
||||||
|
quotes.each do |quote|
|
||||||
|
user_id =
|
||||||
|
if quote["user_id"]
|
||||||
|
user_id_from_imported_id(quote["user_id"])
|
||||||
|
elsif quote["username"]
|
||||||
|
user_id_from_original_username(quote["username"])
|
||||||
|
end
|
||||||
|
|
||||||
|
username = quote["username"]
|
||||||
|
name = nil
|
||||||
|
|
||||||
|
if user_id
|
||||||
|
username = username_from_id(user_id)
|
||||||
|
name = user_full_name_from_id(user_id)
|
||||||
|
end
|
||||||
|
|
||||||
|
bbcode =
|
||||||
|
if username.present? && name.present?
|
||||||
|
%Q|[quote="#{name}, username:#{username}"]|
|
||||||
|
elsif username.present?
|
||||||
|
%Q|[quote="#{username}"]|
|
||||||
|
else
|
||||||
|
"[quote]"
|
||||||
|
end
|
||||||
|
|
||||||
|
raw.gsub!(quote["placeholder"], bbcode)
|
||||||
|
end
|
||||||
|
end
|
||||||
|
|
||||||
|
if (links = placeholders&.fetch("links", nil))
|
||||||
|
links.each do |link|
|
||||||
|
text = link["text"]
|
||||||
|
original_url = link["url"]
|
||||||
|
|
||||||
|
markdown =
|
||||||
|
if link["topic_id"]
|
||||||
|
topic_id = topic_id_from_imported_id(link["topic_id"])
|
||||||
|
url = topic_id ? "#{Discourse.base_url}/t/#{topic_id}" : original_url
|
||||||
|
text ? "[#{text}](#{url})" : url
|
||||||
|
elsif link["post_id"]
|
||||||
|
topic_id = topic_id_from_imported_post_id(link["post_id"])
|
||||||
|
post_number = post_number_from_imported_id(link["post_id"])
|
||||||
|
url =
|
||||||
|
(
|
||||||
|
if topic_id && post_number
|
||||||
|
"#{Discourse.base_url}/t/#{topic_id}/#{post_number}"
|
||||||
|
else
|
||||||
|
original_url
|
||||||
|
end
|
||||||
|
)
|
||||||
|
text ? "[#{text}](#{url})" : url
|
||||||
|
else
|
||||||
|
text ? "[#{text}](#{original_url})" : original_url
|
||||||
|
end
|
||||||
|
|
||||||
|
# ensure that the placeholder is surrounded by whitespace unless it's at the beginning or end of the string
|
||||||
|
placeholder = link["placeholder"]
|
||||||
|
escaped_placeholder = Regexp.escape(placeholder)
|
||||||
|
raw.gsub!(/(?<!\s)#{escaped_placeholder}/, " #{placeholder}")
|
||||||
|
raw.gsub!(/#{escaped_placeholder}(?!\s)/, "#{placeholder} ")
|
||||||
|
|
||||||
|
raw.gsub!(placeholder, markdown)
|
||||||
|
end
|
||||||
|
end
|
||||||
|
|
||||||
if row["upload_ids"].present? && @uploads_db
|
if row["upload_ids"].present? && @uploads_db
|
||||||
upload_ids = JSON.parse(row["upload_ids"])
|
upload_ids = JSON.parse(row["upload_ids"])
|
||||||
upload_ids_placeholders = (["?"] * upload_ids.size).join(",")
|
upload_ids_placeholders = (["?"] * upload_ids.size).join(",")
|
||||||
|
@ -2061,110 +2168,86 @@ class BulkImport::Generic < BulkImport::Base
|
||||||
end
|
end
|
||||||
|
|
||||||
def import_permalinks
|
def import_permalinks
|
||||||
puts "", "Importing permalinks for topics..."
|
puts "", "Importing permalinks..."
|
||||||
|
|
||||||
rows = query(<<~SQL)
|
rows = query(<<~SQL)
|
||||||
SELECT id, old_relative_url
|
SELECT *
|
||||||
FROM topics
|
|
||||||
WHERE old_relative_url IS NOT NULL
|
|
||||||
ORDER BY id
|
|
||||||
SQL
|
|
||||||
|
|
||||||
existing_permalinks = Permalink.where("topic_id IS NOT NULL").pluck(:topic_id).to_set
|
|
||||||
|
|
||||||
create_permalinks(rows) do |row|
|
|
||||||
topic_id = topic_id_from_imported_id(row["id"])
|
|
||||||
next if !topic_id || existing_permalinks.include?(topic_id)
|
|
||||||
|
|
||||||
{ url: row["old_relative_url"], topic_id: topic_id }
|
|
||||||
end
|
|
||||||
|
|
||||||
rows.close
|
|
||||||
|
|
||||||
puts "", "Importing permalinks for posts..."
|
|
||||||
|
|
||||||
rows = query(<<~SQL)
|
|
||||||
SELECT id, old_relative_url
|
|
||||||
FROM posts
|
|
||||||
WHERE old_relative_url IS NOT NULL
|
|
||||||
ORDER BY id
|
|
||||||
SQL
|
|
||||||
|
|
||||||
existing_permalinks = Permalink.where("post_id IS NOT NULL").pluck(:post_id).to_set
|
|
||||||
|
|
||||||
create_permalinks(rows) do |row|
|
|
||||||
post_id = post_id_from_imported_id(row["id"])
|
|
||||||
next if !post_id || existing_permalinks.include?(post_id)
|
|
||||||
|
|
||||||
{ url: row["old_relative_url"], post_id: post_id }
|
|
||||||
end
|
|
||||||
|
|
||||||
rows.close
|
|
||||||
|
|
||||||
puts "", "Importing permalinks for categories..."
|
|
||||||
|
|
||||||
rows = query(<<~SQL)
|
|
||||||
SELECT id, old_relative_url
|
|
||||||
FROM categories
|
|
||||||
WHERE old_relative_url IS NOT NULL
|
|
||||||
ORDER BY id
|
|
||||||
SQL
|
|
||||||
|
|
||||||
existing_permalinks = Permalink.where("category_id IS NOT NULL").pluck(:category_id).to_set
|
|
||||||
|
|
||||||
create_permalinks(rows) do |row|
|
|
||||||
category_id = category_id_from_imported_id(row["id"])
|
|
||||||
next if !category_id || existing_permalinks.include?(category_id)
|
|
||||||
|
|
||||||
{ url: row["old_relative_url"], category_id: category_id }
|
|
||||||
end
|
|
||||||
|
|
||||||
rows.close
|
|
||||||
|
|
||||||
if @tag_mapping
|
|
||||||
puts "", "Importing permalinks for tags..."
|
|
||||||
|
|
||||||
rows = query(<<~SQL)
|
|
||||||
SELECT id, old_relative_url
|
|
||||||
FROM tags
|
|
||||||
WHERE old_relative_url IS NOT NULL
|
|
||||||
ORDER BY id
|
|
||||||
SQL
|
|
||||||
|
|
||||||
existing_permalinks = Permalink.where("tag_id IS NOT NULL").pluck(:tag_id).to_set
|
|
||||||
|
|
||||||
create_permalinks(rows) do |row|
|
|
||||||
tag_id = @tag_mapping[row["id"]]
|
|
||||||
next if !tag_id || existing_permalinks.include?(tag_id)
|
|
||||||
|
|
||||||
{ url: row["old_relative_url"], tag_id: tag_id }
|
|
||||||
end
|
|
||||||
|
|
||||||
rows.close
|
|
||||||
else
|
|
||||||
puts " Skipping import of topic tags because tags have not been imported."
|
|
||||||
end
|
|
||||||
|
|
||||||
puts "", "Importing permalinks for external/relative URLs..."
|
|
||||||
|
|
||||||
rows = query(<<~SQL)
|
|
||||||
SELECT url, external_url
|
|
||||||
FROM permalinks
|
FROM permalinks
|
||||||
WHERE external_url IS NOT NULL
|
|
||||||
ORDER BY url
|
ORDER BY url
|
||||||
SQL
|
SQL
|
||||||
|
|
||||||
existing_permalinks = Permalink.where("external_url IS NOT NULL").pluck(:external_url).to_set
|
existing_permalinks = Permalink.pluck(:url).to_set
|
||||||
|
|
||||||
|
if !@tag_mapping
|
||||||
|
puts "Skipping import of permalinks for tags because tags have not been imported."
|
||||||
|
end
|
||||||
|
|
||||||
create_permalinks(rows) do |row|
|
create_permalinks(rows) do |row|
|
||||||
next if existing_permalinks.include?(row["external_url"])
|
next if existing_permalinks.include?(row["url"])
|
||||||
|
|
||||||
{ url: row["url"], external_url: row["external_url"] }
|
if row["topic_id"]
|
||||||
|
topic_id = topic_id_from_imported_id(row["topic_id"])
|
||||||
|
next unless topic_id
|
||||||
|
{ url: row["url"], topic_id: topic_id }
|
||||||
|
elsif row["post_id"]
|
||||||
|
post_id = post_id_from_imported_id(row["post_id"])
|
||||||
|
next unless post_id
|
||||||
|
{ url: row["url"], post_id: post_id }
|
||||||
|
elsif row["category_id"]
|
||||||
|
category_id = category_id_from_imported_id(row["category_id"])
|
||||||
|
next unless category_id
|
||||||
|
{ url: row["url"], category_id: category_id }
|
||||||
|
elsif row["tag_id"]
|
||||||
|
next unless @tag_mapping
|
||||||
|
tag_id = @tag_mapping[row["tag_id"]]
|
||||||
|
next unless tag_id
|
||||||
|
{ url: row["url"], tag_id: tag_id }
|
||||||
|
elsif row["user_id"]
|
||||||
|
user_id = user_id_from_imported_id(row["user_id"])
|
||||||
|
next unless user_id
|
||||||
|
{ url: row["url"], user_id: user_id }
|
||||||
|
elsif row["external_url"]
|
||||||
|
external_url = calculate_external_url(row)
|
||||||
|
next unless external_url
|
||||||
|
{ url: row["url"], external_url: external_url }
|
||||||
|
end
|
||||||
end
|
end
|
||||||
|
|
||||||
rows.close
|
rows.close
|
||||||
end
|
end
|
||||||
|
|
||||||
|
def calculate_external_url(row)
|
||||||
|
external_url = row["external_url"]
|
||||||
|
placeholders = row["external_url_placeholders"]&.then { |json| JSON.parse(json) }
|
||||||
|
return external_url unless placeholders
|
||||||
|
|
||||||
|
placeholders.each do |placeholder|
|
||||||
|
case placeholder["type"]
|
||||||
|
when "category_url"
|
||||||
|
category_id = category_id_from_imported_id(placeholder["id"])
|
||||||
|
category = Category.find(category_id)
|
||||||
|
external_url.gsub!(
|
||||||
|
placeholder["placeholder"],
|
||||||
|
"c/#{category.slug_path.join("/")}/#{category.id}",
|
||||||
|
)
|
||||||
|
when "category_slug_ref"
|
||||||
|
category_id = category_id_from_imported_id(placeholder["id"])
|
||||||
|
category = Category.find(category_id)
|
||||||
|
external_url.gsub!(placeholder["placeholder"], category.slug_ref)
|
||||||
|
when "tag_name"
|
||||||
|
if @tag_mapping
|
||||||
|
tag_id = @tag_mapping[placeholder["id"]]
|
||||||
|
tag = Tag.find(tag_id)
|
||||||
|
external_url.gsub!(placeholder["placeholder"], tag.name)
|
||||||
|
end
|
||||||
|
else
|
||||||
|
raise "Unknown placeholder type: #{placeholder[:type]}"
|
||||||
|
end
|
||||||
|
end
|
||||||
|
|
||||||
|
external_url
|
||||||
|
end
|
||||||
|
|
||||||
def create_connection(path)
|
def create_connection(path)
|
||||||
sqlite = SQLite3::Database.new(path, results_as_hash: true)
|
sqlite = SQLite3::Database.new(path, results_as_hash: true)
|
||||||
sqlite.busy_timeout = 60_000 # 60 seconds
|
sqlite.busy_timeout = 60_000 # 60 seconds
|
||||||
|
|
Loading…
Reference in New Issue