FIX: improve Vanilla importing (#10478)

* ensure emails don't have spaces
* import banned users as suspended for 1k yrs
* upgrade users to TL2 if they have comments
* topic: import views, closed and pinned info
* import messages
* encode vanilla usernames for permalinks. Vanilla usernames can contain spaces and special characters.
* parse Vanilla's new rich body format
This commit is contained in:
Rachel Carvalho 2020-08-24 16:19:57 -04:00 committed by GitHub
parent 1959745c2c
commit 812e0d6b5e
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
6 changed files with 585 additions and 87 deletions

View File

@ -0,0 +1,211 @@
# frozen_string_literal: true
class VanillaBodyParser
def self.configure(lookup:, uploader:, host:, uploads_path:)
@@lookup = lookup
@@uploader = uploader
@@host = host
@@uploads_path = uploads_path
end
def initialize(row, user_id)
@row = row
@user_id = user_id
end
def parse
return clean_up(@row['Body']) unless rich?
full_text = json.each_with_index.map(&method(:parse_fragment)).join('')
normalize full_text
end
private
def clean_up(text)
text.gsub(/<\/?font[^>]*>/, '').gsub(/<\/?span[^>]*>/, '').gsub(/<\/?div[^>]*>/, '').gsub(/^ +/, '').gsub(/ +/, ' ')
end
def rich?
@row['Format'] == 'Rich'
end
def json
return nil unless rich?
@json ||= JSON.parse(@row['Body']).map(&:deep_symbolize_keys)
end
def parse_fragment(fragment, index)
text = fragment.keys.one? && fragment[:insert].is_a?(String) ? fragment[:insert] : rich_parse(fragment)
text = parse_code(text, fragment, index)
text = parse_list(text, fragment, index)
text
end
def rich_parse(fragment)
insert = fragment[:insert]
return parse_mention(insert[:mention]) if insert.respond_to?(:dig) && insert.dig(:mention, :userID)
return parse_formatting(fragment) if fragment[:attributes]
embed_type = insert.dig(:'embed-external', :data, :embedType)
quoting = embed_type == 'quote'
return parse_quote(insert) if quoting
embed = embed_type.in? ['image', 'link', 'file']
parse_embed(insert) if embed
end
def parse_mention(mention)
user = user_from_imported_id(mention[:userID])
username = user&.username || mention[:name]
"@#{username}"
end
def user_from_imported_id(imported_id)
user_id = @@lookup.user_id_from_imported_user_id(imported_id)
User.find(user_id) if user_id
end
def parse_formatting(fragment)
insert = fragment[:insert]
attributes = fragment[:attributes]
text = fragment[:insert]
text = "<a href=\"#{attributes[:link]}\">#{text}</a>" if attributes[:link]
text = "<i>#{text}</i>" if attributes[:italic]
text = "<b>#{text}</b>" if attributes[:bold]
text
end
# In the Quill format used by Vanilla Forums, a line is rendered as `code`
# when it's followed by a fragment with attributes: {'code-block': true}.
# So we open our ``` block when the next fragment has a 'code-block'
# attribute and the previous one didn't and we close the ``` block when
# the second next fragment does not contain the 'code-block' attribute
def parse_code(text, fragment, index)
next_fragment = next_fragment(index)
next_code = next_fragment.dig(:attributes, :'code-block')
if next_code
previous_fragment = previous_fragment(index)
previous_code = previous_fragment.dig(:attributes, :'code-block')
# if next is code and previous is not, prepend ```
text = "\n```#{text}" unless previous_code
end
current_code = fragment.dig(:attributes, :'code-block')
if current_code
second_next_fragment = second_next_fragment(index)
second_next_code = second_next_fragment.dig(:attributes, :'code-block')
# if current is code and 2 after is not, prepend ```
text = "\n```#{text}" unless second_next_code
end
text
end
def parse_list(text, fragment, index)
next_fragment = next_fragment(index)
next_list = next_fragment.dig(:attributes, :list, :type)
if next_list
# if next is list, prepend <li>
text = '<li>' + text
previous_fragment = previous_fragment(index)
previous_list = previous_fragment.dig(:attributes, :list, :type)
# if next is list and previous is not, prepend <ol> or <ul>
list_tag = next_list == 'ordered' ? '<ol>' : '<ul>'
text = "\n#{list_tag}\n#{text}" unless previous_list
end
current_list = fragment.dig(:attributes, :list, :type)
if current_list
# if current is list prepend </li>
tag_closings = '</li>'
second_next_fragment = second_next_fragment(index)
second_next_list = second_next_fragment.dig(:attributes, :list, :type)
# if current is list and 2 after is not, prepend </ol>
list_tag = current_list == 'ordered' ? '</ol>' : '</ul>'
tag_closings = "#{tag_closings}\n#{list_tag}" unless second_next_list
text = tag_closings + text
end
text
end
def next_fragment(index)
json[index + 1] || {}
end
def previous_fragment(index)
json[index - 1] || {}
end
def second_next_fragment(index)
json[index + 2] || {}
end
def parse_quote(insert)
embed = insert.dig(:'embed-external', :data)
import_post_id = "#{embed[:recordType]}##{embed[:recordID]}"
topic = @@lookup.topic_lookup_from_imported_post_id(import_post_id)
user = user_from_imported_id(embed.dig(:insertUser, :userID))
quote_info = topic && user ? "=\"#{user.username}, post: #{topic[:post_number]}, topic: #{topic[:topic_id]}\"" : ''
"[quote#{quote_info}]\n#{embed[:body]}\n[/quote]\n\n"""
end
def parse_embed(insert)
embed = insert.dig(:'embed-external', :data)
url = embed[:url]
if /https?\:\/\/#{@@host}\/uploads\/.*/.match?(url)
remote_path = url.scan(/uploads\/(.*)/)
path = File.join(@@uploads_path, remote_path)
upload = @@uploader.create_upload(@user_id, path, embed[:name])
if upload&.persisted?
return "\n" + @@uploader.html_for_upload(upload, embed[:name]) + "\n"
else
puts "Failed to upload #{path}"
puts upload.errors.full_messages.join(', ') if upload
end
end
"\n[#{embed[:name]}](#{url})\n"
end
def normalize(full_text)
code_matcher = /```(.*\n)+```/
code_block = full_text[code_matcher]
full_text[code_matcher] = '{{{CODE_BLOCK}}}' if code_block
full_text = double_new_lines(full_text)
full_text['{{{CODE_BLOCK}}}'] = code_block if code_block
full_text
end
def double_new_lines(text)
text.split("\n").map(&:strip).map(&:presence).compact.join("\n\n")
end
end

View File

@ -3,6 +3,7 @@
require "mysql2" require "mysql2"
require File.expand_path(File.dirname(__FILE__) + "/base.rb") require File.expand_path(File.dirname(__FILE__) + "/base.rb")
require 'htmlentities' require 'htmlentities'
require_relative 'vanilla_body_parser'
class ImportScripts::VanillaSQL < ImportScripts::Base class ImportScripts::VanillaSQL < ImportScripts::Base
@ -22,6 +23,13 @@ class ImportScripts::VanillaSQL < ImportScripts::Base
database: VANILLA_DB database: VANILLA_DB
) )
VanillaBodyParser.configure(
lookup: @lookup,
uploader: @uploader,
host: 'vanilla.yourforum.com', # your Vanilla forum domain
uploads_path: 'uploads' # relative path to your vanilla uploads folder
)
@import_tags = false @import_tags = false
begin begin
r = @client.query("select count(*) count from #{TABLE_PREFIX}Tag where countdiscussions > 0") r = @client.query("select count(*) count from #{TABLE_PREFIX}Tag where countdiscussions > 0")
@ -42,6 +50,7 @@ class ImportScripts::VanillaSQL < ImportScripts::Base
import_categories import_categories
import_topics import_topics
import_posts import_posts
import_messages
update_tl0 update_tl0
@ -59,8 +68,8 @@ class ImportScripts::VanillaSQL < ImportScripts::Base
batches(BATCH_SIZE) do |offset| batches(BATCH_SIZE) do |offset|
results = mysql_query( results = mysql_query(
"SELECT UserID, Name, Title, Location, About, Email, "SELECT UserID, Name, Title, Location, About, Email, Admin, Banned, CountComments,
DateInserted, DateLastActive, InsertIPAddress, Admin DateInserted, DateLastActive, InsertIPAddress
FROM #{TABLE_PREFIX}User FROM #{TABLE_PREFIX}User
WHERE UserID > #{@last_user_id} WHERE UserID > #{@last_user_id}
ORDER BY UserID ASC ORDER BY UserID ASC
@ -71,7 +80,9 @@ class ImportScripts::VanillaSQL < ImportScripts::Base
next if all_records_exist? :users, results.map { |u| u['UserID'].to_i } next if all_records_exist? :users, results.map { |u| u['UserID'].to_i }
create_users(results, total: total_count, offset: offset) do |user| create_users(results, total: total_count, offset: offset) do |user|
next if user['Email'].blank? email = user['Email'].squish
next if email.blank?
next if user['Name'].blank? next if user['Name'].blank?
next if @lookup.user_id_from_imported_user_id(user['UserID']) next if @lookup.user_id_from_imported_user_id(user['UserID'])
if user['Name'] == '[Deleted User]' if user['Name'] == '[Deleted User]'
@ -84,8 +95,11 @@ class ImportScripts::VanillaSQL < ImportScripts::Base
username = user['Name'] username = user['Name']
end end
banned = user['Banned'] != 0
commented = (user['CountComments'] || 0) > 0
{ id: user['UserID'], { id: user['UserID'],
email: user['Email'], email: email,
username: username, username: username,
name: user['Name'], name: user['Name'],
created_at: user['DateInserted'] == nil ? 0 : Time.zone.at(user['DateInserted']), created_at: user['DateInserted'] == nil ? 0 : Time.zone.at(user['DateInserted']),
@ -94,10 +108,21 @@ class ImportScripts::VanillaSQL < ImportScripts::Base
last_seen_at: user['DateLastActive'] == nil ? 0 : Time.zone.at(user['DateLastActive']), last_seen_at: user['DateLastActive'] == nil ? 0 : Time.zone.at(user['DateLastActive']),
location: user['Location'], location: user['Location'],
admin: user['Admin'] == 1, admin: user['Admin'] == 1,
trust_level: !banned && commented ? 2 : 0,
post_create_action: proc do |newuser| post_create_action: proc do |newuser|
if @user_is_deleted if @user_is_deleted
@last_deleted_username = newuser.username @last_deleted_username = newuser.username
end end
if banned
newuser.suspended_at = Time.now
# banning on Vanilla doesn't have an end, so a thousand years seems equivalent
newuser.suspended_till = 1000.years.from_now
if newuser.save
StaffActionLogger.new(Discourse.system_user).log_user_suspend(newuser, 'Imported from Vanilla Forum')
else
puts "Failed to suspend user #{newuser.username}. #{newuser.errors.full_messages.join(', ')}"
end
end
end } end }
end end
end end
@ -204,8 +229,8 @@ class ImportScripts::VanillaSQL < ImportScripts::Base
batches(BATCH_SIZE) do |offset| batches(BATCH_SIZE) do |offset|
discussions = mysql_query( discussions = mysql_query(
"SELECT DiscussionID, CategoryID, Name, Body, "SELECT DiscussionID, CategoryID, Name, Body, Format, CountViews, Closed, Announce,
DateInserted, InsertUserID DateInserted, InsertUserID, DateLastComment
FROM #{TABLE_PREFIX}Discussion FROM #{TABLE_PREFIX}Discussion
WHERE DiscussionID > #{@last_topic_id} WHERE DiscussionID > #{@last_topic_id}
ORDER BY DiscussionID ASC ORDER BY DiscussionID ASC
@ -216,12 +241,17 @@ class ImportScripts::VanillaSQL < ImportScripts::Base
next if all_records_exist? :posts, discussions.map { |t| "discussion#" + t['DiscussionID'].to_s } next if all_records_exist? :posts, discussions.map { |t| "discussion#" + t['DiscussionID'].to_s }
create_posts(discussions, total: total_count, offset: offset) do |discussion| create_posts(discussions, total: total_count, offset: offset) do |discussion|
user_id = user_id_from_imported_user_id(discussion['InsertUserID']) || Discourse::SYSTEM_USER_ID
{ {
id: "discussion#" + discussion['DiscussionID'].to_s, id: "discussion#" + discussion['DiscussionID'].to_s,
user_id: user_id_from_imported_user_id(discussion['InsertUserID']) || Discourse::SYSTEM_USER_ID, user_id: user_id,
title: discussion['Name'], title: discussion['Name'],
category: category_id_from_imported_category_id(discussion['CategoryID']), category: category_id_from_imported_category_id(discussion['CategoryID']),
raw: clean_up(discussion['Body']), raw: VanillaBodyParser.new(discussion, user_id).parse,
views: discussion['CountViews'] || 0,
closed: discussion['Closed'] == 1,
pinned_at: discussion['Announce'] == 0 ? nil : Time.zone.at(discussion['DateLastComment'] || discussion['DateInserted']),
pinned_globally: discussion['Announce'] == 1,
created_at: Time.zone.at(discussion['DateInserted']), created_at: Time.zone.at(discussion['DateInserted']),
post_create_action: proc do |post| post_create_action: proc do |post|
if @import_tags if @import_tags
@ -241,7 +271,7 @@ class ImportScripts::VanillaSQL < ImportScripts::Base
@last_post_id = -1 @last_post_id = -1
batches(BATCH_SIZE) do |offset| batches(BATCH_SIZE) do |offset|
comments = mysql_query( comments = mysql_query(
"SELECT CommentID, DiscussionID, Body, "SELECT CommentID, DiscussionID, Body, Format,
DateInserted, InsertUserID DateInserted, InsertUserID
FROM #{TABLE_PREFIX}Comment FROM #{TABLE_PREFIX}Comment
WHERE CommentID > #{@last_post_id} WHERE CommentID > #{@last_post_id}
@ -255,102 +285,77 @@ class ImportScripts::VanillaSQL < ImportScripts::Base
create_posts(comments, total: total_count, offset: offset) do |comment| create_posts(comments, total: total_count, offset: offset) do |comment|
next unless t = topic_lookup_from_imported_post_id("discussion#" + comment['DiscussionID'].to_s) next unless t = topic_lookup_from_imported_post_id("discussion#" + comment['DiscussionID'].to_s)
next if comment['Body'].blank? next if comment['Body'].blank?
user_id = user_id_from_imported_user_id(comment['InsertUserID']) || Discourse::SYSTEM_USER_ID
{ {
id: "comment#" + comment['CommentID'].to_s, id: "comment#" + comment['CommentID'].to_s,
user_id: user_id_from_imported_user_id(comment['InsertUserID']) || Discourse::SYSTEM_USER_ID, user_id: user_id,
topic_id: t[:topic_id], topic_id: t[:topic_id],
raw: clean_up(comment['Body']), raw: VanillaBodyParser.new(comment, user_id).parse,
created_at: Time.zone.at(comment['DateInserted']) created_at: Time.zone.at(comment['DateInserted'])
} }
end end
end end
end end
def clean_up(raw) def import_messages
return "" if raw.blank? puts "", "importing messages..."
# decode HTML entities total_count = mysql_query("SELECT count(*) count FROM #{TABLE_PREFIX}ConversationMessage;").first['count']
raw = @htmlentities.decode(raw)
# fix whitespaces @last_message_id = -1
raw = raw.gsub(/(\\r)?\\n/, "\n")
.gsub("\\t", "\t")
# [HTML]...[/HTML] batches(BATCH_SIZE) do |offset|
raw = raw.gsub(/\[html\]/i, "\n```html\n") messages = mysql_query(
.gsub(/\[\/html\]/i, "\n```\n") "SELECT m.MessageID, m.Body, m.Format,
m.InsertUserID, m.DateInserted,
m.ConversationID, c.Contributors
FROM #{TABLE_PREFIX}ConversationMessage m
INNER JOIN #{TABLE_PREFIX}Conversation c on c.ConversationID = m.ConversationID
WHERE m.MessageID > #{@last_message_id}
ORDER BY m.MessageID ASC
LIMIT #{BATCH_SIZE};")
# [PHP]...[/PHP] break if messages.size < 1
raw = raw.gsub(/\[php\]/i, "\n```php\n") @last_message_id = messages.to_a.last['MessageID']
.gsub(/\[\/php\]/i, "\n```\n") next if all_records_exist? :posts, messages.map { |t| "message#" + t['MessageID'].to_s }
# [HIGHLIGHT="..."] create_posts(messages, total: total_count, offset: offset) do |message|
raw = raw.gsub(/\[highlight="?(\w+)"?\]/i) { "\n```#{$1.downcase}\n" } user_id = user_id_from_imported_user_id(message['InsertUserID']) || Discourse::SYSTEM_USER_ID
body = VanillaBodyParser.new(message, user_id).parse
# [CODE]...[/CODE] common = {
# [HIGHLIGHT]...[/HIGHLIGHT] user_id: user_id,
raw = raw.gsub(/\[\/?code\]/i, "\n```\n") raw: body,
.gsub(/\[\/?highlight\]/i, "\n```\n") created_at: Time.zone.at(message['DateInserted']),
custom_fields: {
conversation_id: message['ConversationID'],
participants: message['Contributors'],
message_id: message['MessageID']
}
}
# [SAMP]...[/SAMP] conversation_id = "conversation#" + message['ConversationID'].to_s
raw.gsub!(/\[\/?samp\]/i, "`") message_id = "message#" + message['MessageID'].to_s
unless CONVERT_HTML imported_conversation = topic_lookup_from_imported_post_id(conversation_id)
# replace all chevrons with HTML entities
# NOTE: must be done
# - AFTER all the "code" processing
# - BEFORE the "quote" processing
raw = raw.gsub(/`([^`]+)`/im) { "`" + $1.gsub("<", "\u2603") + "`" }
.gsub("<", "&lt;")
.gsub("\u2603", "<")
raw = raw.gsub(/`([^`]+)`/im) { "`" + $1.gsub(">", "\u2603") + "`" } if imported_conversation.present?
.gsub(">", "&gt;") common.merge(id: message_id, topic_id: imported_conversation[:topic_id])
.gsub("\u2603", ">") else
user_ids = (message['Contributors'] || '').scan(/\"(\d+)\"/).flatten.map(&:to_i)
usernames = user_ids.map { |id| @lookup.find_user_by_import_id(id).try(:username) }.compact
usernames = [@lookup.find_user_by_import_id(message['InsertUserID']).try(:username)].compact if usernames.empty?
title = body.truncate(40)
{
id: conversation_id,
title: title,
archetype: Archetype.private_message,
target_usernames: usernames.uniq,
}.merge(common)
end
end
end end
# [URL=...]...[/URL]
raw.gsub!(/\[url="?(.+?)"?\](.+)\[\/url\]/i) { "[#{$2}](#{$1})" }
# [IMG]...[/IMG]
raw.gsub!(/\[\/?img\]/i, "")
# [URL]...[/URL]
# [MP3]...[/MP3]
raw = raw.gsub(/\[\/?url\]/i, "")
.gsub(/\[\/?mp3\]/i, "")
# [QUOTE]...[/QUOTE]
raw.gsub!(/\[quote\](.+?)\[\/quote\]/im) { "\n> #{$1}\n" }
# [YOUTUBE]<id>[/YOUTUBE]
raw.gsub!(/\[youtube\](.+?)\[\/youtube\]/i) { "\nhttps://www.youtube.com/watch?v=#{$1}\n" }
# [youtube=425,350]id[/youtube]
raw.gsub!(/\[youtube="?(.+?)"?\](.+)\[\/youtube\]/i) { "\nhttps://www.youtube.com/watch?v=#{$2}\n" }
# [MEDIA=youtube]id[/MEDIA]
raw.gsub!(/\[MEDIA=youtube\](.+?)\[\/MEDIA\]/i) { "\nhttps://www.youtube.com/watch?v=#{$1}\n" }
# [VIDEO=youtube;<id>]...[/VIDEO]
raw.gsub!(/\[video=youtube;([^\]]+)\].*?\[\/video\]/i) { "\nhttps://www.youtube.com/watch?v=#{$1}\n" }
# Convert image bbcode
raw.gsub!(/\[img=(\d+),(\d+)\]([^\]]*)\[\/img\]/i, '<img width="\1" height="\2" src="\3">')
# Remove the color tag
raw.gsub!(/\[color=[#a-z0-9]+\]/i, "")
raw.gsub!(/\[\/color\]/i, "")
# remove attachments
raw.gsub!(/\[attach[^\]]*\]\d+\[\/attach\]/i, "")
# sanitize img tags
# This regexp removes everything between the first and last img tag. The .* is too much.
# If it's needed, it needs to be fixed.
# raw.gsub!(/\<img.*src\="([^\"]+)\".*\>/i) {"\n<img src='#{$1}'>\n"}
raw
end end
def staff_guardian def staff_guardian
@ -368,7 +373,8 @@ class ImportScripts::VanillaSQL < ImportScripts::Base
User.find_each do |u| User.find_each do |u|
ucf = u.custom_fields ucf = u.custom_fields
if ucf && ucf["import_id"] && ucf["import_username"] if ucf && ucf["import_id"] && ucf["import_username"]
Permalink.create(url: "profile/#{ucf['import_id']}/#{ucf['import_username']}", external_url: "/users/#{u.username}") rescue nil encoded_username = CGI.escape(ucf['import_username']).gsub('+', '%20')
Permalink.create(url: "profile/#{ucf['import_id']}/#{encoded_username}", external_url: "/users/#{u.username}") rescue nil
print '.' print '.'
end end
end end

View File

@ -0,0 +1,5 @@
%PDF-1.
1 0 obj<</Pages 2 0 R>>endobj
2 0 obj<</Kids[3 0 R]/Count 1>>endobj
3 0 obj<</Parent 2 0 R>>endobj
trailer <</Root 1 0 R>>

Binary file not shown.

After

Width:  |  Height:  |  Size: 2.2 KiB

View File

@ -0,0 +1,170 @@
{
"text": [{
"insert": "This is a message.\nAnd a second line.\n"
}],
"mention": [
{ "insert": { "mention": { "name": "Gandalf The Grey", "userID": 666 } } },
{
"insert": ", what do you think?\n"
}
],
"links": [
{
"insert": "We can link to the "
},
{
"attributes": { "link": "https:\/\/www.discourse.org\/" },
"insert": "Discourse home page"
},
{
"insert": " and it works."
}
],
"quote": [
{
"insert": {
"embed-external": {
"data": {
"recordID": 12345,
"recordType": "discussion",
"body": "This is the full<br \/>body<br \/>of the quoted discussion.<br \/>",
"bodyRaw": "This is the full\r\nbody\r\nof the quoted discussion.\r\n",
"format": "Html",
"dateInserted": "2020-04-27T15:32:12+00:00",
"insertUser": {
"userID": 34567,
"name": "Saruman",
"photoUrl": "\/\/w1.vanillicon.com\/v2\/117aa212e39e806eed30886df18fe9bc.svg",
"dateLastActive": "2020-05-03T01:08:18+00:00"
},
"displayOptions": {
"showUserLabel": false,
"showCompactUserInfo": true,
"showDiscussionLink": true,
"showPostLink": true,
"showCategoryLink": false,
"renderFullContent": false,
"expandByDefault": false
},
"url": "https:\/\/vanilla.sampleforum.org\/discussion\/12345\/the-discussion-title",
"embedType": "quote",
"name": "The discussion title"
},
"loaderData": {
"type": "link",
"link": "https:\/\/vanilla.sampleforum.org\/discussion\/12345\/the-discussion-title"
}
}
}
},
{ "insert": "When did this happen?\n" }
],
"image": [
{
"insert": "Here's the screenshot:\n"
},
{
"insert": {
"embed-external": {
"data": {
"url": "https:\/\/vanilla.sampleforum.org\/uploads\/569\/ZSM5T09U03WE.png",
"name": "Screen Shot 2020-05-26 at 7.09.06 AM.png",
"type": "image\/png",
"size": 94050,
"width": 1214,
"height": 886,
"mediaID": 22,
"dateInserted": "2020-05-26T11:11:29+00:00",
"insertUserID": 12345,
"foreignType": "embed",
"foreignID": 12345,
"embedType": "image",
"format": null,
"bodyRaw": null
},
"loaderData": { "type": "image" }
}
}
},
{ "insert": "\n" }
],
"embed_link": [
{
"insert": "Does anyone know this website?\n"
},
{
"insert": {
"embed-external": {
"data": {
"body": "A preview text of the url fetched by Vanilla Forum.",
"photoUrl": "https:\/\/someurl.com\/an_image.jpeg",
"url": "https:\/\/someurl.com\/long\/path\/here_and_there\/?fdkmlgm",
"embedType": "link",
"name": "Title of the page being linked",
"format": null,
"bodyRaw": null
},
"loaderData": {
"type": "link",
"link": "https:\/\/someurl.com\/long\/path\/here_and_there\/?fdkmlgm"
}
}
}
},
{ "insert": "\n\n" }
],
"upload_file": [
{
"insert": "This is a PDF I've uploaded:\n"
},
{
"insert": {
"embed-external": {
"data": {
"url": "https:\/\/vanilla.sampleforum.org\/uploads\/393\/5QR3BX57K7HM.pdf",
"name": "original_name_of_file.pdf",
"type": "application\/pdf",
"size": 501287,
"mediaID": 9,
"dateInserted": "2020-05-21T17:58:07+00:00",
"insertUserID": 12345,
"foreignType": "embed",
"foreignID": 12345,
"embedType": "file",
"format": null,
"bodyRaw": null
},
"loaderData": {
"type": "file",
"file": [],
"progressEventEmitter": {
"listeners": [null]
}
}
}
}
},
{ "insert": "\n" }
],
"complex_formatting": [
{ "attributes": { "bold": true }, "insert": "Name" },
{ "insert": ": Jon Snow\n" },
{ "attributes": { "bold": true, "italic": true }, "insert": "* not their real name" },
{ "insert": "\n" },
{ "insert": "first item" },
{ "attributes": { "list": { "depth": 0, "type": "ordered" } }, "insert": "\n" },
{ "insert": "second" },
{ "attributes": { "list": { "depth": 0, "type": "ordered" } }, "insert": "\n" },
{ "insert": "third and last" },
{ "attributes": { "list": { "depth": 0, "type": "ordered" } }, "insert": "\n" },
{ "insert": "That's all folks!\n" }
],
"code_block": [
{ "insert": "Here's a monospaced block:" },
{ "insert": "this line should be monospaced" },
{ "attributes": { "code-block": true }, "insert": "\n" },
{ "insert": "this one too, with extra spaces " },
{ "attributes": { "code-block": true }, "insert": "\n" },
{ "insert": "but not this one" }
]
}

View File

@ -0,0 +1,106 @@
# frozen_string_literal: true
require 'rails_helper'
require_relative '../../../script/import_scripts/vanilla_body_parser'
require_relative '../../../script/import_scripts/base/lookup_container'
require_relative '../../../script/import_scripts/base/uploader'
describe VanillaBodyParser do
let(:lookup) { ImportScripts::LookupContainer.new }
let(:uploader) { ImportScripts::Uploader.new }
let(:uploads_path) { 'spec/fixtures/images/vanilla_import' }
let(:user) { Fabricate(:user, id: '34567', email: 'saruman@maiar.org', name: 'Saruman, Multicolor', username: 'saruman_multicolor') }
let(:user_id) { lookup.add_user('34567', user) }
before do
STDOUT.stubs(:write)
STDERR.stubs(:write)
VanillaBodyParser.configure(lookup: lookup, uploader: uploader, host: 'vanilla.sampleforum.org', uploads_path: uploads_path)
end
it 'keeps regular text intact' do
parsed = VanillaBodyParser.new({ 'Format' => 'Html', 'Body' => 'Hello everyone!' }, user_id).parse
expect(parsed).to eq 'Hello everyone!'
end
it 'keeps html tags' do
parsed = VanillaBodyParser.new({ 'Format' => 'Html', 'Body' => 'H<br>E<br>L<br>L<br>O' }, user_id).parse
expect(parsed).to eq "H<br>E<br>L<br>L<br>O"
end
it 'parses invalid html, removes font tags and leading spaces' do
complex_html = '''<b><font color=green>this was bold and green:</b></font color=green>
this starts with spaces but IS NOT a quote'''
parsed = VanillaBodyParser.new({ 'Format' => 'Html', 'Body' => complex_html }, user_id).parse
expect(parsed).to eq '''<b>this was bold and green:</b>
this starts with spaces but IS NOT a quote'''
end
describe 'rich format' do
let(:rich_bodies) { JSON.parse(File.read('spec/fixtures/json/vanilla-rich-posts.json')).deep_symbolize_keys }
it 'extracts text-only bodies' do
parsed = VanillaBodyParser.new({ 'Format' => 'Rich', 'Body' => rich_bodies[:text].to_json }, user_id).parse
expect(parsed).to eq "This is a message.\n\nAnd a second line."
end
it 'supports mentions of non-imported users' do
parsed = VanillaBodyParser.new({ 'Format' => 'Rich', 'Body' => rich_bodies[:mention].to_json }, user_id).parse
expect(parsed).to eq "@Gandalf The Grey, what do you think?"
end
it 'supports mentions imported users' do
mentioned = Fabricate(:user, id: '666', email: 'gandalf@maiar.com', name: 'Gandalf The Grey', username: 'gandalf_the_grey')
lookup.add_user('666', mentioned)
parsed = VanillaBodyParser.new({ 'Format' => 'Rich', 'Body' => rich_bodies[:mention].to_json }, user_id).parse
expect(parsed).to eq "@gandalf_the_grey, what do you think?"
end
it 'supports links' do
parsed = VanillaBodyParser.new({ 'Format' => 'Rich', 'Body' => rich_bodies[:links].to_json }, user_id).parse
expect(parsed).to eq "We can link to the <a href=\"https:\/\/www.discourse.org\/\">Discourse home page</a> and it works."
end
it 'supports quotes without topic info when it cannot be found' do
parsed = VanillaBodyParser.new({ 'Format' => 'Rich', 'Body' => rich_bodies[:quote].to_json }, user_id).parse
expect(parsed).to eq "[quote]\n\nThis is the full<br \/>body<br \/>of the quoted discussion.<br \/>\n\n[/quote]\n\nWhen did this happen?"
end
it 'supports quotes with user and topic info' do
post = Fabricate(:post, user: user, id: 'discussion#12345', raw: "This is the full\r\nbody\r\nof the quoted discussion.\r\n")
topic_id = lookup.add_topic(post)
lookup.add_post('discussion#12345', post)
parsed = VanillaBodyParser.new({ 'Format' => 'Rich', 'Body' => rich_bodies[:quote].to_json }, user_id).parse
expect(parsed).to eq "[quote=\"#{user.username}, post: #{post.post_number}, topic: #{post.topic.id}\"]\n\nThis is the full<br \/>body<br \/>of the quoted discussion.<br \/>\n\n[/quote]\n\nWhen did this happen?"
end
it 'supports uploaded images' do
parsed = VanillaBodyParser.new({ 'Format' => 'Rich', 'Body' => rich_bodies[:image].to_json }, user_id).parse
expect(parsed).to match(/Here's the screenshot\:\n\n\!\[Screen Shot 2020\-05\-26 at 7\.09\.06 AM\.png\|\d+x\d+\]\(upload\:\/\/\w+\.png\)$/)
end
it 'supports embedded links' do
parsed = VanillaBodyParser.new({ 'Format' => 'Rich', 'Body' => rich_bodies[:embed_link].to_json }, user_id).parse
expect(parsed).to eq "Does anyone know this website?\n\n[Title of the page being linked](https:\/\/someurl.com\/long\/path\/here_and_there\/?fdkmlgm)"
end
it 'keeps uploaded files as links' do
parsed = VanillaBodyParser.new({ 'Format' => 'Rich', 'Body' => rich_bodies[:upload_file].to_json }, user_id).parse
expect(parsed).to eq "This is a PDF I've uploaded:\n\n[original_name_of_file.pdf](https:\/\/vanilla.sampleforum.org\/uploads\/393\/5QR3BX57K7HM.pdf)"
end
it 'supports complex formatting' do
parsed = VanillaBodyParser.new({ 'Format' => 'Rich', 'Body' => rich_bodies[:complex_formatting].to_json }, user_id).parse
expect(parsed).to eq "<b>Name</b>: Jon Snow\n\n<b><i>* not their real name</i></b>\n\n<ol>\n\n<li>first item</li>\n\n<li>second</li>\n\n<li>third and last</li>\n\n</ol>\n\nThat's all folks!"
end
it 'support code blocks' do
parsed = VanillaBodyParser.new({ 'Format' => 'Rich', 'Body' => rich_bodies[:code_block].to_json }, user_id).parse
expect(parsed).to eq "Here's a monospaced block:\n\n```this line should be monospaced\nthis one too, with extra spaces#{' ' * 4}\n```\n\nbut not this one"
end
end
end