Improve Vanilla import script. (#11701)
- import groups and group users - import uploads/attachments - improved code tag parsing - improved text formatting - mark topics as solved
This commit is contained in:
parent
74b95c88ac
commit
bd7cbcd8f8
|
@ -24,6 +24,17 @@ class VanillaBodyParser
|
||||||
private
|
private
|
||||||
|
|
||||||
def clean_up(text)
|
def clean_up(text)
|
||||||
|
# <pre class="CodeBlock">...</pre>
|
||||||
|
text = text.gsub(/\<pre class="CodeBlock"\>(.*?)\<\/pre\>/im) { "\n```\n#{$1}\n```\n" }
|
||||||
|
# <pre>...</pre>
|
||||||
|
text = text.gsub(/\<pre\>(.*?)\<\/pre\>/im) { "\n```\n#{$1}\n```\n" }
|
||||||
|
# <code></code>
|
||||||
|
text = text.gsub("\<code\>\</code\>", "").gsub(/\<code\>(.*?)\<\/code\>/im) { "#{$1}" }
|
||||||
|
# <div class="Quote">...</div>
|
||||||
|
text = text.gsub(/\<div class="Quote"\>(.*?)\<\/div\>/im) { "\n[quote]\n#{$1}\n[/quote]\n" }
|
||||||
|
# [code], [quote]
|
||||||
|
text = text.gsub(/\[\/?code\]/i, "\n```\n").gsub(/\[quote.*?\]/i, "\n" + '\0' + "\n").gsub(/\[\/quote\]/i, "\n" + '\0' + "\n")
|
||||||
|
|
||||||
text.gsub(/<\/?font[^>]*>/, '').gsub(/<\/?span[^>]*>/, '').gsub(/<\/?div[^>]*>/, '').gsub(/^ +/, '').gsub(/ +/, ' ')
|
text.gsub(/<\/?font[^>]*>/, '').gsub(/<\/?span[^>]*>/, '').gsub(/<\/?div[^>]*>/, '').gsub(/^ +/, '').gsub(/ +/, ' ')
|
||||||
end
|
end
|
||||||
|
|
||||||
|
@ -58,7 +69,7 @@ class VanillaBodyParser
|
||||||
return parse_quote(insert) if quoting
|
return parse_quote(insert) if quoting
|
||||||
|
|
||||||
embed = embed_type.in? ['image', 'link', 'file']
|
embed = embed_type.in? ['image', 'link', 'file']
|
||||||
parse_embed(insert) if embed
|
parse_embed(insert, embed_type) if embed
|
||||||
end
|
end
|
||||||
|
|
||||||
def parse_mention(mention)
|
def parse_mention(mention)
|
||||||
|
@ -87,9 +98,6 @@ class VanillaBodyParser
|
||||||
|
|
||||||
# In the Quill format used by Vanilla Forums, a line is rendered as `code`
|
# In the Quill format used by Vanilla Forums, a line is rendered as `code`
|
||||||
# when it's followed by a fragment with attributes: {'code-block': true}.
|
# when it's followed by a fragment with attributes: {'code-block': true}.
|
||||||
# So we open our ``` block when the next fragment has a 'code-block'
|
|
||||||
# attribute and the previous one didn't and we close the ``` block when
|
|
||||||
# the second next fragment does not contain the 'code-block' attribute
|
|
||||||
def parse_code(text, fragment, index)
|
def parse_code(text, fragment, index)
|
||||||
next_fragment = next_fragment(index)
|
next_fragment = next_fragment(index)
|
||||||
|
|
||||||
|
@ -98,18 +106,27 @@ class VanillaBodyParser
|
||||||
previous_fragment = previous_fragment(index)
|
previous_fragment = previous_fragment(index)
|
||||||
previous_code = previous_fragment.dig(:attributes, :'code-block')
|
previous_code = previous_fragment.dig(:attributes, :'code-block')
|
||||||
|
|
||||||
# if next is code and previous is not, prepend ```
|
if previous_code
|
||||||
text = "\n```#{text}" unless previous_code
|
text = text.gsub(/\\n(.*?)\\n/) { "\n```\n#{$1}\n```\n" }
|
||||||
|
else
|
||||||
|
last_pos = text.rindex(/\n/)
|
||||||
|
|
||||||
|
if last_pos
|
||||||
|
array = [text[0..last_pos].strip, text[last_pos + 1 .. text.length].strip]
|
||||||
|
text = array.join("\n```\n")
|
||||||
|
else
|
||||||
|
text = "\n```\n#{text}"
|
||||||
|
end
|
||||||
|
end
|
||||||
end
|
end
|
||||||
|
|
||||||
current_code = fragment.dig(:attributes, :'code-block')
|
current_code = fragment.dig(:attributes, :'code-block')
|
||||||
|
|
||||||
if current_code
|
if current_code
|
||||||
second_next_fragment = second_next_fragment(index)
|
second_next_fragment = second_next_fragment(index)
|
||||||
second_next_code = second_next_fragment.dig(:attributes, :'code-block')
|
second_next_code = second_next_fragment.dig(:attributes, :'code-block')
|
||||||
|
|
||||||
# if current is code and 2 after is not, prepend ```
|
# if current is code and 2 after is not, prepend ```
|
||||||
text = "\n```#{text}" unless second_next_code
|
text = "\n```\n#{text}" unless second_next_code
|
||||||
end
|
end
|
||||||
|
|
||||||
text
|
text
|
||||||
|
@ -174,7 +191,7 @@ class VanillaBodyParser
|
||||||
"[quote#{quote_info}]\n#{embed[:body]}\n[/quote]\n\n"""
|
"[quote#{quote_info}]\n#{embed[:body]}\n[/quote]\n\n"""
|
||||||
end
|
end
|
||||||
|
|
||||||
def parse_embed(insert)
|
def parse_embed(insert, embed_type)
|
||||||
embed = insert.dig(:'embed-external', :data)
|
embed = insert.dig(:'embed-external', :data)
|
||||||
|
|
||||||
url = embed[:url]
|
url = embed[:url]
|
||||||
|
@ -193,7 +210,13 @@ class VanillaBodyParser
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
|
|
||||||
|
if embed_type == "link"
|
||||||
"\n[#{embed[:name]}](#{url})\n"
|
"\n[#{embed[:name]}](#{url})\n"
|
||||||
|
elsif embed_type == "image"
|
||||||
|
"\n<img src=\"#{url}\" alt=\"#{embed[:name]}\">\n"
|
||||||
|
else
|
||||||
|
"\n<a href=\"#{url}\">#{embed[:name]}</a>\n"
|
||||||
|
end
|
||||||
end
|
end
|
||||||
|
|
||||||
def normalize(full_text)
|
def normalize(full_text)
|
||||||
|
|
|
@ -45,16 +45,37 @@ class ImportScripts::VanillaSQL < ImportScripts::Base
|
||||||
SiteSetting.max_tags_per_topic = 10
|
SiteSetting.max_tags_per_topic = 10
|
||||||
end
|
end
|
||||||
|
|
||||||
|
import_groups
|
||||||
import_users
|
import_users
|
||||||
import_avatars
|
import_avatars
|
||||||
|
import_group_users
|
||||||
import_categories
|
import_categories
|
||||||
import_topics
|
import_topics
|
||||||
import_posts
|
import_posts
|
||||||
import_messages
|
import_messages
|
||||||
|
|
||||||
update_tl0
|
update_tl0
|
||||||
|
mark_topics_as_solved
|
||||||
|
|
||||||
create_permalinks
|
create_permalinks
|
||||||
|
import_attachments
|
||||||
|
end
|
||||||
|
|
||||||
|
def import_groups
|
||||||
|
puts "", "importing groups..."
|
||||||
|
|
||||||
|
groups = mysql_query <<-SQL
|
||||||
|
SELECT RoleID, Name
|
||||||
|
FROM #{TABLE_PREFIX}Role
|
||||||
|
ORDER BY RoleID
|
||||||
|
SQL
|
||||||
|
|
||||||
|
create_groups(groups) do |group|
|
||||||
|
{
|
||||||
|
id: group["RoleID"],
|
||||||
|
name: @htmlentities.decode(group["Name"]).strip
|
||||||
|
}
|
||||||
|
end
|
||||||
end
|
end
|
||||||
|
|
||||||
def import_users
|
def import_users
|
||||||
|
@ -147,7 +168,7 @@ class ImportScripts::VanillaSQL < ImportScripts::Base
|
||||||
|
|
||||||
photo_real_filename = nil
|
photo_real_filename = nil
|
||||||
parts = photo.squeeze("/").split("/")
|
parts = photo.squeeze("/").split("/")
|
||||||
if parts[0] == "cf:"
|
if parts[0] =~ /^[a-z0-9]{2}:/
|
||||||
photo_path = "#{ATTACHMENTS_BASE_DIR}/#{parts[2..-2].join('/')}".squeeze("/")
|
photo_path = "#{ATTACHMENTS_BASE_DIR}/#{parts[2..-2].join('/')}".squeeze("/")
|
||||||
elsif parts[0] == "~cf"
|
elsif parts[0] == "~cf"
|
||||||
photo_path = "#{ATTACHMENTS_BASE_DIR}/#{parts[1..-2].join('/')}".squeeze("/")
|
photo_path = "#{ATTACHMENTS_BASE_DIR}/#{parts[1..-2].join('/')}".squeeze("/")
|
||||||
|
@ -200,6 +221,24 @@ class ImportScripts::VanillaSQL < ImportScripts::Base
|
||||||
nil
|
nil
|
||||||
end
|
end
|
||||||
|
|
||||||
|
def import_group_users
|
||||||
|
puts "", "importing group users..."
|
||||||
|
|
||||||
|
group_users = mysql_query("
|
||||||
|
SELECT RoleID, UserID
|
||||||
|
FROM #{TABLE_PREFIX}UserRole
|
||||||
|
").to_a
|
||||||
|
|
||||||
|
group_users.each do |row|
|
||||||
|
user_id = user_id_from_imported_user_id(row["UserID"])
|
||||||
|
group_id = group_id_from_imported_group_id(row["RoleID"])
|
||||||
|
|
||||||
|
if user_id && group_id
|
||||||
|
GroupUser.find_or_create_by(user_id: user_id, group_id: group_id)
|
||||||
|
end
|
||||||
|
end
|
||||||
|
end
|
||||||
|
|
||||||
def import_categories
|
def import_categories
|
||||||
puts "", "importing categories..."
|
puts "", "importing categories..."
|
||||||
|
|
||||||
|
@ -272,7 +311,7 @@ class ImportScripts::VanillaSQL < ImportScripts::Base
|
||||||
batches(BATCH_SIZE) do |offset|
|
batches(BATCH_SIZE) do |offset|
|
||||||
comments = mysql_query(
|
comments = mysql_query(
|
||||||
"SELECT CommentID, DiscussionID, Body, Format,
|
"SELECT CommentID, DiscussionID, Body, Format,
|
||||||
DateInserted, InsertUserID
|
DateInserted, InsertUserID, QnA
|
||||||
FROM #{TABLE_PREFIX}Comment
|
FROM #{TABLE_PREFIX}Comment
|
||||||
WHERE CommentID > #{@last_post_id}
|
WHERE CommentID > #{@last_post_id}
|
||||||
ORDER BY CommentID ASC
|
ORDER BY CommentID ASC
|
||||||
|
@ -286,13 +325,20 @@ class ImportScripts::VanillaSQL < ImportScripts::Base
|
||||||
next unless t = topic_lookup_from_imported_post_id("discussion#" + comment['DiscussionID'].to_s)
|
next unless t = topic_lookup_from_imported_post_id("discussion#" + comment['DiscussionID'].to_s)
|
||||||
next if comment['Body'].blank?
|
next if comment['Body'].blank?
|
||||||
user_id = user_id_from_imported_user_id(comment['InsertUserID']) || Discourse::SYSTEM_USER_ID
|
user_id = user_id_from_imported_user_id(comment['InsertUserID']) || Discourse::SYSTEM_USER_ID
|
||||||
{
|
|
||||||
|
mapped = {
|
||||||
id: "comment#" + comment['CommentID'].to_s,
|
id: "comment#" + comment['CommentID'].to_s,
|
||||||
user_id: user_id,
|
user_id: user_id,
|
||||||
topic_id: t[:topic_id],
|
topic_id: t[:topic_id],
|
||||||
raw: VanillaBodyParser.new(comment, user_id).parse,
|
raw: VanillaBodyParser.new(comment, user_id).parse,
|
||||||
created_at: Time.zone.at(comment['DateInserted'])
|
created_at: Time.zone.at(comment['DateInserted'])
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if comment['QnA'] == "Accepted"
|
||||||
|
mapped[:custom_fields] = { is_accepted_answer: "true" }
|
||||||
|
end
|
||||||
|
|
||||||
|
mapped
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
|
@ -395,6 +441,104 @@ class ImportScripts::VanillaSQL < ImportScripts::Base
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
|
|
||||||
|
def import_attachments
|
||||||
|
if ATTACHMENTS_BASE_DIR && File.exists?(ATTACHMENTS_BASE_DIR)
|
||||||
|
puts "", "importing attachments"
|
||||||
|
|
||||||
|
start = Time.now
|
||||||
|
count = 0
|
||||||
|
|
||||||
|
# https://us.v-cdn.net/1234567/uploads/editor/xyz/image.jpg
|
||||||
|
cdn_regex = /https:\/\/us.v-cdn.net\/1234567\/uploads\/(\S+\/(\w|-)+.\w+)/i
|
||||||
|
# [attachment=10109:Screen Shot 2012-04-01 at 3.47.35 AM.png]
|
||||||
|
attachment_regex = /\[attachment=(\d+):(.*?)\]/i
|
||||||
|
|
||||||
|
Post.where("raw LIKE '%/us.v-cdn.net/%' OR raw LIKE '%[attachment%'").find_each do |post|
|
||||||
|
count += 1
|
||||||
|
print "\r%7d - %6d/sec" % [count, count.to_f / (Time.now - start)]
|
||||||
|
new_raw = post.raw.dup
|
||||||
|
|
||||||
|
new_raw.gsub!(attachment_regex) do |s|
|
||||||
|
matches = attachment_regex.match(s)
|
||||||
|
attachment_id = matches[1]
|
||||||
|
file_name = matches[2]
|
||||||
|
next unless attachment_id
|
||||||
|
|
||||||
|
r = mysql_query("SELECT Path, Name FROM #{TABLE_PREFIX}Media WHERE MediaID = #{attachment_id};").first
|
||||||
|
next if r.nil?
|
||||||
|
path = r["Path"]
|
||||||
|
name = r["Name"]
|
||||||
|
next unless path.present?
|
||||||
|
|
||||||
|
path.gsub!("s3://content/", "")
|
||||||
|
path.gsub!("s3://uploads/", "")
|
||||||
|
file_path = "#{ATTACHMENTS_BASE_DIR}/#{path}"
|
||||||
|
|
||||||
|
if File.exists?(file_path)
|
||||||
|
upload = create_upload(post.user.id, file_path, File.basename(file_path))
|
||||||
|
if upload && upload.errors.empty?
|
||||||
|
# upload.url
|
||||||
|
filename = name || file_name || File.basename(file_path)
|
||||||
|
html_for_upload(upload, normalize_text(filename))
|
||||||
|
else
|
||||||
|
puts "Error: Upload did not persist for #{post.id} #{attachment_id}!"
|
||||||
|
end
|
||||||
|
else
|
||||||
|
puts "Couldn't find file for #{attachment_id}. Skipping."
|
||||||
|
next
|
||||||
|
end
|
||||||
|
end
|
||||||
|
|
||||||
|
new_raw.gsub!(cdn_regex) do |s|
|
||||||
|
matches = cdn_regex.match(s)
|
||||||
|
attachment_id = matches[1]
|
||||||
|
|
||||||
|
file_path = "#{ATTACHMENTS_BASE_DIR}/#{attachment_id}"
|
||||||
|
|
||||||
|
if File.exists?(file_path)
|
||||||
|
upload = create_upload(post.user.id, file_path, File.basename(file_path))
|
||||||
|
if upload && upload.errors.empty?
|
||||||
|
upload.url
|
||||||
|
else
|
||||||
|
puts "Error: Upload did not persist for #{post.id} #{attachment_id}!"
|
||||||
|
end
|
||||||
|
else
|
||||||
|
puts "Couldn't find file for #{attachment_id}. Skipping."
|
||||||
|
next
|
||||||
|
end
|
||||||
|
end
|
||||||
|
|
||||||
|
if new_raw != post.raw
|
||||||
|
begin
|
||||||
|
PostRevisor.new(post).revise!(post.user, { raw: new_raw }, skip_revision: true, skip_validations: true, bypass_bump: true)
|
||||||
|
rescue
|
||||||
|
puts "PostRevisor error for #{post.id}"
|
||||||
|
post.raw = new_raw
|
||||||
|
post.save(validate: false)
|
||||||
|
end
|
||||||
|
end
|
||||||
|
end
|
||||||
|
end
|
||||||
|
end
|
||||||
|
|
||||||
|
def mark_topics_as_solved
|
||||||
|
puts "", "Marking topics as solved..."
|
||||||
|
|
||||||
|
DB.exec <<~SQL
|
||||||
|
INSERT INTO topic_custom_fields (name, value, topic_id, created_at, updated_at)
|
||||||
|
SELECT 'accepted_answer_post_id', pcf.post_id, p.topic_id, p.created_at, p.created_at
|
||||||
|
FROM post_custom_fields pcf
|
||||||
|
JOIN posts p ON p.id = pcf.post_id
|
||||||
|
WHERE pcf.name = 'is_accepted_answer' AND pcf.value = 'true'
|
||||||
|
AND NOT EXISTS (
|
||||||
|
SELECT 1
|
||||||
|
FROM topic_custom_fields x
|
||||||
|
WHERE x.topic_id = p.topic_id AND x.name = 'accepted_answer_post_id'
|
||||||
|
)
|
||||||
|
ON CONFLICT DO NOTHING
|
||||||
|
SQL
|
||||||
|
end
|
||||||
|
|
||||||
end
|
end
|
||||||
|
|
||||||
ImportScripts::VanillaSQL.new.perform
|
ImportScripts::VanillaSQL.new.perform
|
||||||
|
|
|
@ -90,7 +90,7 @@ this starts with spaces but IS NOT a quote'''
|
||||||
|
|
||||||
it 'keeps uploaded files as links' do
|
it 'keeps uploaded files as links' do
|
||||||
parsed = VanillaBodyParser.new({ 'Format' => 'Rich', 'Body' => rich_bodies[:upload_file].to_json }, user_id).parse
|
parsed = VanillaBodyParser.new({ 'Format' => 'Rich', 'Body' => rich_bodies[:upload_file].to_json }, user_id).parse
|
||||||
expect(parsed).to eq "This is a PDF I've uploaded:\n\n[original_name_of_file.pdf](https:\/\/vanilla.sampleforum.org\/uploads\/393\/5QR3BX57K7HM.pdf)"
|
expect(parsed).to eq "This is a PDF I've uploaded:\n\n<a href=\"https://vanilla.sampleforum.org/uploads/393/5QR3BX57K7HM.pdf\">original_name_of_file.pdf</a>"
|
||||||
end
|
end
|
||||||
|
|
||||||
it 'supports complex formatting' do
|
it 'supports complex formatting' do
|
||||||
|
@ -100,7 +100,7 @@ this starts with spaces but IS NOT a quote'''
|
||||||
|
|
||||||
it 'support code blocks' do
|
it 'support code blocks' do
|
||||||
parsed = VanillaBodyParser.new({ 'Format' => 'Rich', 'Body' => rich_bodies[:code_block].to_json }, user_id).parse
|
parsed = VanillaBodyParser.new({ 'Format' => 'Rich', 'Body' => rich_bodies[:code_block].to_json }, user_id).parse
|
||||||
expect(parsed).to eq "Here's a monospaced block:\n\n```this line should be monospaced\nthis one too, with extra spaces#{' ' * 4}\n```\n\nbut not this one"
|
expect(parsed).to eq "Here's a monospaced block:\n\n```\nthis line should be monospaced\nthis one too, with extra spaces#{' ' * 4}\n```\n\nbut not this one"
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
|
|
Loading…
Reference in New Issue