DEV: minor improvements in the vanilla import script. (#14026)

We're parsing the post raw based on the record format now.
This commit is contained in:
Vinoth Kannan 2021-08-12 15:07:44 +05:30 committed by GitHub
parent aed65ec16d
commit cd9262b7d3
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
1 changed files with 21 additions and 5 deletions

View File

@ -336,7 +336,7 @@ class ImportScripts::VanillaSQL < ImportScripts::Base
user_id: user_id, user_id: user_id,
title: discussion['Name'], title: discussion['Name'],
category: category_id_from_imported_category_id(discussion['CategoryID']) || @category_mappings[discussion['CategoryID']].try(:[], :category_id), category: category_id_from_imported_category_id(discussion['CategoryID']) || @category_mappings[discussion['CategoryID']].try(:[], :category_id),
raw: @vb_parser ? VanillaBodyParser.new(discussion, user_id).parse : process_raw(discussion['Body']), raw: get_raw(discussion, user_id),
views: discussion['CountViews'] || 0, views: discussion['CountViews'] || 0,
closed: discussion['Closed'] == 1, closed: discussion['Closed'] == 1,
pinned_at: discussion['Announce'] == 0 ? nil : Time.zone.at(discussion['DateLastComment'] || discussion['DateInserted']), pinned_at: discussion['Announce'] == 0 ? nil : Time.zone.at(discussion['DateLastComment'] || discussion['DateInserted']),
@ -381,7 +381,7 @@ class ImportScripts::VanillaSQL < ImportScripts::Base
id: "comment#" + comment['CommentID'].to_s, id: "comment#" + comment['CommentID'].to_s,
user_id: user_id, user_id: user_id,
topic_id: t[:topic_id], topic_id: t[:topic_id],
raw: @vb_parser ? VanillaBodyParser.new(comment, user_id).parse : process_raw(comment['Body']), raw: get_raw(comment, user_id),
created_at: Time.zone.at(comment['DateInserted']) created_at: Time.zone.at(comment['DateInserted'])
} }
@ -449,7 +449,7 @@ class ImportScripts::VanillaSQL < ImportScripts::Base
create_posts(messages, total: total_count, offset: offset) do |message| create_posts(messages, total: total_count, offset: offset) do |message|
user_id = user_id_from_imported_user_id(message['InsertUserID']) || Discourse::SYSTEM_USER_ID user_id = user_id_from_imported_user_id(message['InsertUserID']) || Discourse::SYSTEM_USER_ID
body = @vb_parser ? VanillaBodyParser.new(message, user_id).parse : process_raw(message['Body']) body = get_raw(message, user_id)
common = { common = {
user_id: user_id, user_id: user_id,
@ -486,14 +486,30 @@ class ImportScripts::VanillaSQL < ImportScripts::Base
end end
end end
def process_raw(raw) def get_raw(record, user_id)
format = (record['Format'] || "").downcase
body = record['Body']
case format
when "html"
process_raw(body)
when "rich"
VanillaBodyParser.new(record, user_id).parse
when "markdown"
process_raw(body, skip_reverse_markdown: true)
else
@vb_parser ? VanillaBodyParser.new(record, user_id).parse : process_raw(body)
end
end
def process_raw(raw, skip_reverse_markdown: false)
return if raw == nil return if raw == nil
raw = @htmlentities.decode(raw) raw = @htmlentities.decode(raw)
# convert user profile links to user mentions # convert user profile links to user mentions
raw.gsub!(/<a.*>(@\S+?)<\/a>/) { $1 } raw.gsub!(/<a.*>(@\S+?)<\/a>/) { $1 }
raw = ReverseMarkdown.convert(raw) raw = ReverseMarkdown.convert(raw) unless skip_reverse_markdown
raw.scrub! raw.scrub!