DEV: minor improvements in the vanilla import script. ()

We're parsing the post raw based on the record format now.
This commit is contained in:
Vinoth Kannan 2021-08-12 15:07:44 +05:30 committed by GitHub
parent aed65ec16d
commit cd9262b7d3
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
1 changed files with 21 additions and 5 deletions
script/import_scripts

View File

@ -336,7 +336,7 @@ class ImportScripts::VanillaSQL < ImportScripts::Base
user_id: user_id,
title: discussion['Name'],
category: category_id_from_imported_category_id(discussion['CategoryID']) || @category_mappings[discussion['CategoryID']].try(:[], :category_id),
raw: @vb_parser ? VanillaBodyParser.new(discussion, user_id).parse : process_raw(discussion['Body']),
raw: get_raw(discussion, user_id),
views: discussion['CountViews'] || 0,
closed: discussion['Closed'] == 1,
pinned_at: discussion['Announce'] == 0 ? nil : Time.zone.at(discussion['DateLastComment'] || discussion['DateInserted']),
@ -381,7 +381,7 @@ class ImportScripts::VanillaSQL < ImportScripts::Base
id: "comment#" + comment['CommentID'].to_s,
user_id: user_id,
topic_id: t[:topic_id],
raw: @vb_parser ? VanillaBodyParser.new(comment, user_id).parse : process_raw(comment['Body']),
raw: get_raw(comment, user_id),
created_at: Time.zone.at(comment['DateInserted'])
}
@ -449,7 +449,7 @@ class ImportScripts::VanillaSQL < ImportScripts::Base
create_posts(messages, total: total_count, offset: offset) do |message|
user_id = user_id_from_imported_user_id(message['InsertUserID']) || Discourse::SYSTEM_USER_ID
body = @vb_parser ? VanillaBodyParser.new(message, user_id).parse : process_raw(message['Body'])
body = get_raw(message, user_id)
common = {
user_id: user_id,
@ -486,14 +486,30 @@ class ImportScripts::VanillaSQL < ImportScripts::Base
end
end
def process_raw(raw)
def get_raw(record, user_id)
format = (record['Format'] || "").downcase
body = record['Body']
case format
when "html"
process_raw(body)
when "rich"
VanillaBodyParser.new(record, user_id).parse
when "markdown"
process_raw(body, skip_reverse_markdown: true)
else
@vb_parser ? VanillaBodyParser.new(record, user_id).parse : process_raw(body)
end
end
def process_raw(raw, skip_reverse_markdown: false)
return if raw == nil
raw = @htmlentities.decode(raw)
# convert user profile links to user mentions
raw.gsub!(/<a.*>(@\S+?)<\/a>/) { $1 }
raw = ReverseMarkdown.convert(raw)
raw = ReverseMarkdown.convert(raw) unless skip_reverse_markdown
raw.scrub!