Improvements to phpBB3 import script (#10999)

* FEATURE: Import attachments

* FEATURE: Add support for importing multiple forums in one

* FEATURE: Add support for category and tag mapping

* FEATURE: Import groups

* FIX: Add spaces around images

* FEATURE: Custom mapping of user rank to trust levels

* FIX: Do not fail import if it cannot import polls

* FIX: Optimize existing records lookup

Co-authored-by: Gerhard Schlager <mail@gerhard-schlager.at>
Co-authored-by: Jarek Radosz <jradosz@gmail.com>
This commit is contained in:
Bianca Nenciu 2021-01-14 21:44:43 +02:00 committed by GitHub
parent 82af278ae5
commit a71b219c9a
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
20 changed files with 1606 additions and 79 deletions

View File

@ -606,10 +606,15 @@ class ImportScripts::Base
skipped += 1
puts "Skipping bookmark for user id #{params[:user_id]} and post id #{params[:post_id]}"
else
result = BookmarkManager.new(user).create(post_id: post.id)
begin
manager = BookmarkManager.new(user)
bookmark = manager.create(post_id: post.id)
created += 1 if result.errors.none?
skipped += 1 if result.errors.any?
created += 1 if manager.errors.none?
skipped += 1 if manager.errors.any?
rescue
skipped += 1
end
end
end

View File

@ -57,6 +57,11 @@ module ImportScripts
UserCustomField.where(name: 'import_id', value: import_id.to_s).first.try(:user)
end
def find_username_by_import_id(import_id)
user_id = user_id_from_imported_user_id(import_id)
User.where(id: user_id).pluck(:username).first if user_id.present?
end
# Get the Discourse Category id based on the id of the source category
def category_id_from_imported_category_id(import_id)
@categories[import_id] || @categories[import_id.to_s]

View File

@ -22,13 +22,13 @@ module ImportScripts::PhpBB3
if version.start_with?('3.0')
require_relative 'database_3_0'
Database_3_0.new(@database_client, @database_settings)
elsif version.start_with?('3.1')
elsif version.start_with?('3.1') || version.start_with?('3.2')
require_relative 'database_3_1'
Database_3_1.new(@database_client, @database_settings)
else
raise UnsupportedVersionError, <<~MSG
Unsupported version (#{version}) of phpBB detected.
Currently only 3.0.x and 3.1.x are supported by this importer.
Currently only version 3.0, 3.1 and 3.2 are supported by this importer.
MSG
end
end

View File

@ -53,6 +53,20 @@ module ImportScripts::PhpBB3
SQL
end
def fetch_groups
query(<<-SQL)
SELECT g.group_id, g.group_type, g.group_name, g.group_desc
FROM #{@table_prefix}groups g
SQL
end
def fetch_group_users
query(<<-SQL)
SELECT ug.group_id, ug.user_id, ug.group_leader
FROM #{@table_prefix}user_group ug
SQL
end
def fetch_categories
query(<<-SQL)
SELECT f.forum_id, f.parent_id, f.forum_name, f.forum_desc, x.first_post_time
@ -213,12 +227,20 @@ module ImportScripts::PhpBB3
SELECT b.user_id, t.topic_first_post_id
FROM #{@table_prefix}bookmarks b
JOIN #{@table_prefix}topics t ON (b.topic_id = t.topic_id)
WHERE b.user_id > #{last_user_id} AND b.topic_id > #{last_topic_id}
WHERE b.user_id > #{last_user_id}
ORDER BY b.user_id, b.topic_id
LIMIT #{@batch_size}
SQL
end
def get_smiley(smiley_code)
query(<<-SQL).first
SELECT emotion, smiley_url
FROM #{@table_prefix}smilies
WHERE code = '#{smiley_code}'
SQL
end
def get_config_values
query(<<-SQL).first
SELECT

View File

@ -27,8 +27,13 @@ module ImportScripts::PhpBB3
def execute
puts '', "importing from phpBB #{@php_config[:phpbb_version]}"
SiteSetting.tagging_enabled = true if @settings.tag_mappings.present?
import_users
import_anonymous_users if @settings.import_anonymous_users
import_groups
import_user_groups
import_new_categories
import_categories
import_posts
import_private_messages if @settings.import_private_messages
@ -67,12 +72,12 @@ module ImportScripts::PhpBB3
batches do |offset|
rows, last_user_id = @database.fetch_users(last_user_id)
rows = rows.to_a.uniq { |row| row[:user_id] }
break if rows.size < 1
next if all_records_exist?(:users, importer.map_users_to_import_ids(rows))
create_users(rows, total: total_count, offset: offset) do |row|
begin
next if user_id_from_imported_user_id(@settings.prefix(row[:user_id]))
importer.map_user(row)
rescue => e
log_error("Failed to map user with ID #{row[:user_id]}", e)
@ -91,10 +96,9 @@ module ImportScripts::PhpBB3
rows, last_username = @database.fetch_anonymous_users(last_username)
break if rows.size < 1
next if all_records_exist?(:users, importer.map_anonymous_users_to_import_ids(rows))
create_users(rows, total: total_count, offset: offset) do |row|
begin
next if user_id_from_imported_user_id(@settings.prefix(row[:post_username]))
importer.map_anonymous_user(row)
rescue => e
log_error("Failed to map anonymous user with ID #{row[:user_id]}", e)
@ -103,12 +107,74 @@ module ImportScripts::PhpBB3
end
end
def import_groups
puts '', 'creating groups'
rows = @database.fetch_groups
create_groups(rows) do |row|
begin
next if row[:group_type] == 3
group_name = if @settings.site_name.present?
"#{@settings.site_name}_#{row[:group_name]}"
else
row[:group_name]
end[0..19].gsub(/[^a-zA-Z0-9\-_. ]/, '_')
bio_raw = @importers.text_processor.process_raw_text(row[:group_desc]) rescue row[:group_desc]
{
id: @settings.prefix(row[:group_id]),
name: group_name,
full_name: row[:group_name],
bio_raw: bio_raw
}
rescue => e
log_error("Failed to map group with ID #{row[:group_id]}", e)
end
end
end
def import_user_groups
puts '', 'creating user groups'
rows = @database.fetch_group_users
rows.each do |row|
group_id = @lookup.group_id_from_imported_group_id(@settings.prefix(row[:group_id]))
next if !group_id
user_id = @lookup.user_id_from_imported_user_id(@settings.prefix(row[:user_id]))
begin
GroupUser.find_or_create_by(user_id: user_id, group_id: group_id, owner: row[:group_leader])
rescue => e
log_error("Failed to add user #{row[:user_id]} to group #{row[:group_id]}", e)
end
end
end
def import_new_categories
puts '', 'creating new categories'
create_categories(@settings.new_categories) do |row|
next if row == "SKIP"
{
id: @settings.prefix(row[:forum_id]),
name: row[:name],
parent_category_id: @lookup.category_id_from_imported_category_id(@settings.prefix(row[:parent_id]))
}
end
end
def import_categories
puts '', 'creating categories'
rows = @database.fetch_categories
importer = @importers.category_importer
create_categories(rows) do |row|
next if @settings.category_mappings[row[:forum_id].to_s] == 'SKIP'
importer.map_category(row)
end
end
@ -123,10 +189,9 @@ module ImportScripts::PhpBB3
rows, last_post_id = @database.fetch_posts(last_post_id)
break if rows.size < 1
next if all_records_exist?(:posts, importer.map_to_import_ids(rows))
create_posts(rows, total: total_count, offset: offset) do |row|
begin
next if post_id_from_imported_post_id(@settings.prefix(row[:post_id]))
importer.map_post(row)
rescue => e
log_error("Failed to map post with ID #{row[:post_id]}", e)
@ -145,10 +210,9 @@ module ImportScripts::PhpBB3
rows, last_msg_id = @database.fetch_messages(last_msg_id)
break if rows.size < 1
next if all_records_exist?(:posts, importer.map_to_import_ids(rows))
create_posts(rows, total: total_count, offset: offset) do |row|
begin
next if post_id_from_imported_post_id(@settings.prefix("pm:#{row[:msg_id]}"))
importer.map_message(row)
rescue => e
log_error("Failed to map message with ID #{row[:msg_id]}", e)
@ -168,7 +232,11 @@ module ImportScripts::PhpBB3
break if rows.size < 1
create_bookmarks(rows, total: total_count, offset: offset) do |row|
importer.map_bookmark(row)
begin
importer.map_bookmark(row)
rescue => e
log_error("Failed to map bookmark (#{row[:user_id]}, #{row[:topic_first_post_id]})", e)
end
end
end
end

View File

@ -2,10 +2,14 @@
module ImportScripts::PhpBB3
class BookmarkImporter
def initialize(settings)
@settings = settings
end
def map_bookmark(row)
{
user_id: row[:user_id],
post_id: row[:topic_first_post_id]
user_id: @settings.prefix(row[:user_id]),
post_id: @settings.prefix(row[:topic_first_post_id])
}
end
end

View File

@ -5,20 +5,28 @@ module ImportScripts::PhpBB3
# @param lookup [ImportScripts::LookupContainer]
# @param text_processor [ImportScripts::PhpBB3::TextProcessor]
# @param permalink_importer [ImportScripts::PhpBB3::PermalinkImporter]
def initialize(lookup, text_processor, permalink_importer)
# @param settings [ImportScripts::PhpBB3::Settings]
def initialize(lookup, text_processor, permalink_importer, settings)
@lookup = lookup
@text_processor = text_processor
@permalink_importer = permalink_importer
@settings = settings
end
def map_category(row)
return if @settings.category_mappings[row[:forum_id].to_s]
if row[:parent_id] && @settings.category_mappings[row[:parent_id].to_s]
puts "parent category (#{row[:parent_id]}) was mapped, but children was not (#{row[:forum_id]})"
end
{
id: row[:forum_id],
id: @settings.prefix(row[:forum_id]),
name: CGI.unescapeHTML(row[:forum_name]),
parent_category_id: @lookup.category_id_from_imported_category_id(row[:parent_id]),
parent_category_id: @lookup.category_id_from_imported_category_id(@settings.prefix(row[:parent_id])),
post_create_action: proc do |category|
update_category_description(category, row)
@permalink_importer.create_for_category(category, row[:forum_id])
@permalink_importer.create_for_category(category, row[:forum_id]) # skip @settings.prefix because ID is used in permalink generation
end
}
end
@ -43,7 +51,7 @@ module ImportScripts::PhpBB3
end
if row[:forum_desc].present?
changes = { raw: @text_processor.process_raw_text(row[:forum_desc]) }
changes = { raw: (@text_processor.process_raw_text(row[:forum_desc]) rescue row[:forum_desc]) }
opts = { revised_at: post.created_at, bypass_bump: true }
post.revise(Discourse.system_user, changes, opts)
end

View File

@ -32,7 +32,7 @@ module ImportScripts::PhpBB3
end
def category_importer
CategoryImporter.new(@lookup, text_processor, permalink_importer)
CategoryImporter.new(@lookup, text_processor, permalink_importer, @settings)
end
def post_importer
@ -44,15 +44,13 @@ module ImportScripts::PhpBB3
end
def bookmark_importer
BookmarkImporter.new
BookmarkImporter.new(@settings)
end
def permalink_importer
@permalink_importer ||= PermalinkImporter.new(@settings.permalinks)
end
protected
def attachment_importer
AttachmentImporter.new(@database, @uploader, @settings, @phpbb_config)
end
@ -62,15 +60,15 @@ module ImportScripts::PhpBB3
end
def poll_importer
PollImporter.new(@lookup, @database, text_processor)
PollImporter.new(@lookup, @database, text_processor, @settings)
end
def text_processor
@text_processor ||= TextProcessor.new(@lookup, @database, smiley_processor, @settings)
@text_processor ||= TextProcessor.new(@lookup, @database, smiley_processor, @settings, @phpbb_config)
end
def smiley_processor
SmileyProcessor.new(@uploader, @settings, @phpbb_config)
SmileyProcessor.new(@uploader, @database, @settings, @phpbb_config)
end
end
end

View File

@ -20,7 +20,7 @@ module ImportScripts::PhpBB3
end
def map_message(row)
user_id = @lookup.user_id_from_imported_user_id(row[:author_id]) || Discourse.system_user.id
user_id = @lookup.user_id_from_imported_user_id(@settings.prefix(row[:author_id])) || Discourse.system_user.id
attachments = import_attachments(row, user_id)
mapped = {
@ -84,7 +84,7 @@ module ImportScripts::PhpBB3
import_user_ids = get_recipient_user_ids(row[:to_address])
import_user_ids.map! do |import_user_id|
@lookup.find_user_by_import_id(import_user_id).try(:username)
@lookup.find_user_by_import_id(@settings.prefix(import_user_id)).try(:username)
end.compact
end
@ -93,7 +93,7 @@ module ImportScripts::PhpBB3
end
def get_import_id(msg_id)
"pm:#{msg_id}"
@settings.prefix("pm:#{msg_id}")
end
# Creates a sorted array consisting of the message's author and recipients.

View File

@ -39,7 +39,7 @@ module ImportScripts::PhpBB3
end
def create_for_post(post, import_id)
return unless @settings.create_topic_links && post
return unless @settings.create_post_links && post
url = "viewtopic.php?p=#{import_id}"

View File

@ -5,10 +5,12 @@ module ImportScripts::PhpBB3
# @param lookup [ImportScripts::LookupContainer]
# @param database [ImportScripts::PhpBB3::Database_3_0 | ImportScripts::PhpBB3::Database_3_1]
# @param text_processor [ImportScripts::PhpBB3::TextProcessor]
def initialize(lookup, database, text_processor)
# @param settings [ImportScripts::PhpBB3::Settings]
def initialize(lookup, database, text_processor, settings)
@lookup = lookup
@database = database
@text_processor = text_processor
@settings = settings
end
# @param poll_data [ImportScripts::PhpBB3::PollData]
@ -47,7 +49,7 @@ module ImportScripts::PhpBB3
end
def get_option_text(row)
text = @text_processor.process_raw_text(row[:poll_option_text])
text = @text_processor.process_raw_text(row[:poll_option_text]) rescue row[:poll_option_text]
text.squish!
text.gsub!(/^(\d+)\./, '\1\.')
text
@ -55,7 +57,7 @@ module ImportScripts::PhpBB3
# @param poll_data [ImportScripts::PhpBB3::PollData]
def get_poll_text(poll_data)
title = @text_processor.process_raw_text(poll_data.title)
title = @text_processor.process_raw_text(poll_data.title) rescue poll_data.title
text = +"#{title}\n\n"
arguments = ["results=always"]
@ -118,7 +120,7 @@ module ImportScripts::PhpBB3
rows.each do |row|
option_id = mapped_option_ids[row[:poll_option_id]]
user_id = @lookup.user_id_from_imported_user_id(row[:user_id])
user_id = @lookup.user_id_from_imported_user_id(@settings.prefix(row[:user_id]))
if option_id.present? && user_id.present?
PollVote.create!(poll: poll, poll_option_id: option_id, user_id: user_id)

View File

@ -18,22 +18,24 @@ module ImportScripts::PhpBB3
end
def map_to_import_ids(rows)
rows.map { |row| row[:post_id] }
rows.map { |row| @settings.prefix(row[:post_id]) }
end
def map_post(row)
imported_user_id = row[:post_username].blank? ? row[:poster_id] : row[:post_username]
return if @settings.category_mappings[row[:forum_id].to_s] == 'SKIP'
imported_user_id = @settings.prefix(row[:post_username].blank? ? row[:poster_id] : row[:post_username])
user_id = @lookup.user_id_from_imported_user_id(imported_user_id) || -1
is_first_post = row[:post_id] == row[:topic_first_post_id]
attachments = import_attachments(row, user_id)
mapped = {
id: row[:post_id],
id: @settings.prefix(row[:post_id]),
user_id: user_id,
created_at: Time.zone.at(row[:post_time]),
raw: @text_processor.process_post(row[:post_text], attachments),
import_topic_id: row[:topic_id]
import_topic_id: @settings.prefix(row[:topic_id])
}
if is_first_post
@ -54,14 +56,18 @@ module ImportScripts::PhpBB3
def map_first_post(row, mapped)
poll_data = add_poll(row, mapped) if @settings.import_polls
mapped[:category] = @lookup.category_id_from_imported_category_id(row[:forum_id])
mapped[:category] = @lookup.category_id_from_imported_category_id(@settings.prefix(@settings.category_mappings[row[:forum_id].to_s])) ||
@lookup.category_id_from_imported_category_id(@settings.prefix(row[:forum_id]))
mapped[:title] = CGI.unescapeHTML(row[:topic_title]).strip[0...255]
mapped[:pinned_at] = mapped[:created_at] unless row[:topic_type] == Constants::POST_NORMAL
mapped[:pinned_globally] = row[:topic_type] == Constants::POST_GLOBAL
mapped[:views] = row[:topic_views]
mapped[:post_create_action] = proc do |post|
@permalink_importer.create_for_topic(post.topic, row[:topic_id])
@permalink_importer.create_for_post(post, row[:post_id])
if tags = @settings.tag_mappings[row[:forum_id].to_s].presence
DiscourseTagging.tag_topic_by_names(post.topic, staff_guardian, tags)
end
@permalink_importer.create_for_topic(post.topic, row[:topic_id]) # skip @settings.prefix because ID is used in permalink generation
@permalink_importer.create_for_post(post, row[:post_id]) # skip @settings.prefix because ID is used in permalink generation
@poll_importer.update_poll(row[:topic_id], post, poll_data) if poll_data
TopicViewItem.add(post.topic_id, row[:poster_ip], post.user_id, post.created_at, true)
end
@ -70,16 +76,16 @@ module ImportScripts::PhpBB3
end
def map_other_post(row, mapped)
parent = @lookup.topic_lookup_from_imported_post_id(row[:topic_first_post_id])
parent = @lookup.topic_lookup_from_imported_post_id(@settings.prefix(row[:topic_first_post_id]))
if parent.blank?
puts "Parent post #{row[:topic_first_post_id]} doesn't exist. Skipping #{row[:post_id]}: #{row[:topic_title][0..40]}"
puts "Parent post #{@settings.prefix(row[:topic_first_post_id])} doesn't exist. Skipping #{@settings.prefix(row[:post_id])}: #{row[:topic_title][0..40]}"
return nil
end
mapped[:topic_id] = parent[:topic_id]
mapped[:post_create_action] = proc do |post|
@permalink_importer.create_for_post(post, row[:post_id])
@permalink_importer.create_for_post(post, row[:post_id]) # skip @settings.prefix because ID is used in permalink generation
TopicViewItem.add(post.topic_id, row[:poster_ip], post.user_id, post.created_at, true)
end
@ -91,9 +97,14 @@ module ImportScripts::PhpBB3
poll_data = PollData.new(row[:poll_title], row[:poll_max_options], row[:poll_end])
poll_raw = @poll_importer.create_raw(row[:topic_id], poll_data)
return if poll_data.options.size < 2
mapped_post[:raw] = poll_raw << "\n\n" << mapped_post[:raw]
poll_data
end
def staff_guardian
@_staff_guardian ||= Guardian.new(Discourse.system_user)
end
end
end

View File

@ -12,14 +12,18 @@ module ImportScripts::PhpBB3
end
def map_users_to_import_ids(rows)
rows.map { |row| row[:user_id] }
rows.map { |row| @settings.prefix(row[:user_id]) }
end
def map_user(row)
is_active_user = row[:user_inactive_reason] != Constants::INACTIVE_REGISTER
trust_level = row[:user_posts] == 0 ? TrustLevel[0] : TrustLevel[1]
trust_level = @settings.trust_level_for_posts(row[:user_posts], trust_level: trust_level)
manual_locked_trust_level = trust_level > TrustLevel[1] ? trust_level : nil
{
id: row[:user_id],
id: @settings.prefix(row[:user_id]),
email: row[:user_email],
username: row[:username],
password: @settings.import_passwords ? row[:user_password] : nil,
@ -28,7 +32,8 @@ module ImportScripts::PhpBB3
last_seen_at: row[:user_lastvisit] == 0 ? Time.zone.at(row[:user_regdate]) : Time.zone.at(row[:user_lastvisit]),
registration_ip_address: (IPAddr.new(row[:user_ip]) rescue nil),
active: is_active_user,
trust_level: row[:user_posts] == 0 ? TrustLevel[0] : TrustLevel[1],
trust_level: trust_level,
manual_locked_trust_level: manual_locked_trust_level,
approved: is_active_user,
approved_by_id: is_active_user ? Discourse.system_user.id : nil,
approved_at: is_active_user ? Time.now : nil,
@ -45,14 +50,14 @@ module ImportScripts::PhpBB3
end
def map_anonymous_users_to_import_ids(rows)
rows.map { |row| row[:post_username] }
rows.map { |row| @settings.prefix(row[:post_username]) }
end
def map_anonymous_user(row)
username = row[:post_username]
{
id: username,
id: @settings.prefix(username),
email: "anonymous_#{SecureRandom.hex}@no-email.invalid",
username: username,
name: @settings.username_as_name ? username : '',

View File

@ -11,6 +11,73 @@ database:
batch_size: 1000 # Don't change this unless you know what you're doing. The default (1000) should work just fine.
import:
# Set this if you import multiple phpBB forums into a single Discourse forum.
#
# For example, when importing multiple sites, prefix all imported IDs
# with 'first' to avoid conflicts. Subsequent import runs must have a
# different 'site_name'.
#
# site_name: first
#
site_name:
# Create new categories
#
# For example, to create a parent category and a subcategory.
#
# new_categories:
# - forum_id: foo
# name: Foo Category
# - forum_id: bar
# name: Bar Category
# parent_id: foo
#
new_categories:
# Category mappings
#
# For example, topics from phpBB category 1 and 2 will be imported
# in the new "Foo Category" category, topics from phpBB category 3
# will be imported in subcategory "Bar category", topics from phpBB
# category 4 will be merged into category 5 and category 6 will be
# skipped.
#
# category_mappings:
# 1: foo
# 2: foo
# 3: bar
# 4: 5
# 6: SKIP
#
category_mappings:
# Tag mappings
#
# For example, imported topics from phpBB category 1 will be tagged
# with 'first-category', etc.
#
# tag_mappings:
# 1:
# - first-category
# 2:
# - second-category
# 3:
# - third-category
#
tag_mappings:
# Rank to trust level mapping
#
# Map phpBB 3.x rank levels to trust level
# Users with rank at least 3000 will have TL3, etc.
#
# rank_mapping:
# trust_level_1: 200
# trust_level_2: 1000
# trust_level_3: 3000
#
rank_mapping:
# WARNING: Do not activate this option unless you know what you are doing.
# It will probably break the BBCode to Markdown conversion and slows down your import.
use_bbcode_to_md: false

View File

@ -0,0 +1,88 @@
# frozen_string_literal: true
module ImportScripts; end
module ImportScripts::PhpBB3; end
module ImportScripts::PhpBB3::BBCode
LINEBREAK_AUTO = :auto
LINEBREAK_HARD = :hard
LINEBREAK_HTML = :html
class MarkdownNode
# @return [String]
attr_reader :xml_node_name
# @return [MarkdownNode]
attr_reader :parent
# @return [Array<MarkdownNode>]
attr_reader :children
# @return [Array<MarkdownNode>]
attr_accessor :previous_sibling
# @return [Array<MarkdownNode>]
attr_accessor :next_sibling
# @return [String]
attr_accessor :text
# @return [String]
attr_accessor :prefix
# @return [String]
attr_accessor :postfix
# @return [Integer]
attr_accessor :prefix_linebreaks
# @return [Integer]
attr_accessor :postfix_linebreaks
# @return [Symbol]
attr_accessor :prefix_linebreak_type
# @return [Symbol]
attr_accessor :postfix_linebreak_type
# @return [String]
attr_accessor :prefix_children
# @param xml_node_name [String]
# @param parent [MarkdownNode]
def initialize(xml_node_name:, parent:)
@xml_node_name = xml_node_name
@text = +""
@prefix = +""
@postfix = +""
@prefix_linebreaks = 0
@postfix_linebreaks = 0
@prefix_linebreak_type = LINEBREAK_AUTO
@postfix_linebreak_type = LINEBREAK_AUTO
@parent = parent
@children = []
if @parent
@previous_sibling = @parent.children.last
@previous_sibling.next_sibling = self if @previous_sibling
@parent.children << self
end
end
def enclosed_with=(text)
@prefix = @postfix = text
end
def skip_children
@children = nil
end
def to_s
"name: #{xml_node_name}, prefix: #{prefix}, text: #{text}, children: #{children.size}, postfix: #{postfix}"
end
end
end

View File

@ -0,0 +1,356 @@
# frozen_string_literal: true
require 'nokogiri'
require_relative 'markdown_node'
module ImportScripts::PhpBB3::BBCode
class XmlToMarkdown
def initialize(xml, opts = {})
@username_from_user_id = opts[:username_from_user_id]
@smilie_to_emoji = opts[:smilie_to_emoji]
@quoted_post_from_post_id = opts[:quoted_post_from_post_id]
@upload_md_from_file = opts[:upload_md_from_file]
@url_replacement = opts[:url_replacement]
@allow_inline_code = opts.fetch(:allow_inline_code, false)
@traditional_linebreaks = opts.fetch(:traditional_linebreaks, false)
@doc = Nokogiri::XML(xml)
@list_stack = []
end
def convert
preprocess_xml
md_root = MarkdownNode.new(xml_node_name: "ROOT", parent: nil)
visit(@doc.root, md_root)
to_markdown(md_root).rstrip
end
private
IGNORED_ELEMENTS = ["s", "e", "i"]
ELEMENTS_WITHOUT_LEADING_WHITESPACES = ["LIST", "LI"]
ELEMENTS_WITH_HARD_LINEBREAKS = ["B", "I", "U"]
EXPLICIT_LINEBREAK_THRESHOLD = 2
def preprocess_xml
@doc.traverse do |node|
if node.is_a? Nokogiri::XML::Text
node.content = node.content.gsub(/\A\n+\s*/, "")
node.content = node.content.lstrip if remove_leading_whitespaces?(node)
node.remove if node.content.empty?
elsif IGNORED_ELEMENTS.include?(node.name)
node.remove
end
end
end
def remove_leading_whitespaces?(xml_node)
parent = xml_node.parent
return false unless parent
ELEMENTS_WITHOUT_LEADING_WHITESPACES.include?(parent.name) &&
parent.children.first == xml_node
end
def visit(xml_node, md_parent)
visitor = "visit_#{xml_node.name}"
visitor_exists = respond_to?(visitor, include_all: true)
if visitor_exists && md_parent.children
md_node = create_node(xml_node, md_parent)
send(visitor, xml_node, md_node)
end
xml_node.children.each { |xml_child| visit(xml_child, md_node || md_parent) }
after_hook = "after_#{xml_node.name}"
if respond_to?(after_hook, include_all: true)
send(after_hook, xml_node, md_node)
end
end
def create_node(xml_node, md_parent)
if xml_node.name == "br"
last_child = md_parent.children.last
return last_child if last_child&.xml_node_name == "br"
end
MarkdownNode.new(xml_node_name: xml_node.name, parent: md_parent)
end
def visit_text(xml_node, md_node)
md_node.text << text(xml_node)
end
def visit_B(xml_node, md_node)
if xml_node.parent&.name != 'B'
md_node.enclosed_with = "**"
end
end
def visit_I(xml_node, md_node)
if xml_node.parent&.name != 'I'
md_node.enclosed_with = "_"
end
end
def visit_U(xml_node, md_node)
if xml_node.parent&.name != 'U'
md_node.prefix = "[u]"
md_node.postfix = "[/u]"
end
end
def visit_CODE(xml_node, md_node)
content = xml_node.content
if !@allow_inline_code || content.include?("\n")
md_node.prefix = "```text\n"
md_node.postfix = "\n```"
else
md_node.enclosed_with = "`"
end
md_node.text = content.rstrip
md_node.skip_children
md_node.prefix_linebreaks = md_node.postfix_linebreaks = 2
md_node.prefix_linebreak_type = LINEBREAK_HTML
end
def visit_LIST(xml_node, md_node)
md_node.prefix_linebreaks = md_node.postfix_linebreaks = @list_stack.size == 0 ? 2 : 1
md_node.prefix_linebreak_type = LINEBREAK_HTML if @list_stack.size == 0
@list_stack << {
unordered: xml_node.attribute('type').nil?,
item_count: 0
}
end
def after_LIST(xml_node, md_node)
@list_stack.pop
end
def visit_LI(xml_node, md_node)
list = @list_stack.last
depth = @list_stack.size - 1
list[:item_count] += 1
indentation = ' ' * 2 * depth
symbol = list[:unordered] ? '*' : "#{list[:item_count]}."
md_node.prefix = "#{indentation}#{symbol} "
md_node.postfix_linebreaks = 1
end
def visit_IMG(xml_node, md_node)
md_node.text = +"![](#{xml_node.attribute('src')})"
md_node.prefix_linebreaks = md_node.postfix_linebreaks = 2
md_node.skip_children
end
def visit_URL(xml_node, md_node)
original_url = xml_node.attribute('url').to_s
url = CGI.unescapeHTML(original_url)
url = @url_replacement.call(url) if @url_replacement
if xml_node.content.strip == original_url
md_node.text = url
md_node.skip_children
else
md_node.prefix = "["
md_node.postfix = "](#{url})"
end
end
def visit_EMAIL(xml_node, md_node)
md_node.prefix = "<"
md_node.postfix = ">"
end
def visit_br(xml_node, md_node)
md_node.postfix_linebreaks += 1
if md_node.postfix_linebreaks > 1 && ELEMENTS_WITH_HARD_LINEBREAKS.include?(xml_node.parent&.name)
md_node.postfix_linebreak_type = LINEBREAK_HARD
end
end
def visit_E(xml_node, md_node)
if @smilie_to_emoji
md_node.text = @smilie_to_emoji.call(xml_node.content)
md_node.skip_children
end
end
def visit_QUOTE(xml_node, md_node)
if post = quoted_post(xml_node)
md_node.prefix = %Q{[quote="#{post[:username]}, post:#{post[:post_number]}, topic:#{post[:topic_id]}"]\n}
md_node.postfix = "\n[/quote]"
elsif username = quoted_username(xml_node)
md_node.prefix = %Q{[quote="#{username}"]\n}
md_node.postfix = "\n[/quote]"
else
md_node.prefix_children = "> "
end
md_node.prefix_linebreaks = md_node.postfix_linebreaks = 2
md_node.prefix_linebreak_type = LINEBREAK_HTML
end
def quoted_post(xml_node)
if @quoted_post_from_post_id
post_id = to_i(xml_node.attr("post_id"))
@quoted_post_from_post_id.call(post_id) if post_id
end
end
def quoted_username(xml_node)
if @username_from_user_id
user_id = to_i(xml_node.attr("user_id"))
username = @username_from_user_id.call(user_id) if user_id
end
username = xml_node.attr("author") unless username
username
end
def to_i(string)
string.to_i if string&.match(/\A\d+\z/)
end
def visit_ATTACHMENT(xml_node, md_node)
filename = xml_node.attr("filename")
index = to_i(xml_node.attr("index"))
md_node.text = @upload_md_from_file.call(filename, index) if @upload_md_from_file
md_node.prefix_linebreaks = md_node.postfix_linebreaks = 1
md_node.skip_children
end
def visit_SIZE(xml_node, md_node)
size = to_i(xml_node.attr("size"))
return if size.nil?
if size.between?(1, 99)
md_node.prefix = '<small>'
md_node.postfix = '</small>'
elsif size.between?(101, 200)
md_node.prefix = '<big>'
md_node.postfix = '</big>'
end
end
def text(xml_node, escape_markdown: true)
text = CGI.unescapeHTML(xml_node.text)
# text.gsub!(/[\\`*_{}\[\]()#+\-.!~]/) { |c| "\\#{c}" } if escape_markdown
text
end
# @param md_parent [MarkdownNode]
def to_markdown(md_parent)
markdown = +""
md_parent.children.each do |md_node|
prefix = md_node.prefix
text = md_node.children&.any? ? to_markdown(md_node) : md_node.text
postfix = md_node.postfix
parent_prefix = prefix_from_parent(md_parent)
if parent_prefix && md_node.xml_node_name != "br" && (md_parent.prefix_children || !markdown.empty?)
prefix = "#{parent_prefix}#{prefix}"
end
if md_node.xml_node_name != "CODE"
text, prefix, postfix = hoist_whitespaces!(markdown, text, prefix, postfix)
end
add_linebreaks!(markdown, md_node.prefix_linebreaks, md_node.prefix_linebreak_type, parent_prefix)
markdown << prefix
markdown << text
markdown << postfix
add_linebreaks!(markdown, md_node.postfix_linebreaks, md_node.postfix_linebreak_type, parent_prefix)
end
markdown
end
def hoist_whitespaces!(markdown, text, prefix, postfix)
text = text.lstrip if markdown.end_with?("\n")
unless prefix.empty?
if starts_with_whitespace?(text) && !ends_with_whitespace?(markdown)
prefix = "#{text[0]}#{prefix}"
end
text = text.lstrip
end
unless postfix.empty?
if ends_with_whitespace?(text)
postfix = "#{postfix}#{text[-1]}"
end
text = text.rstrip
end
[text, prefix, postfix]
end
def prefix_from_parent(md_parent)
while md_parent
return md_parent.prefix_children if md_parent.prefix_children
md_parent = md_parent.parent
end
end
def add_linebreaks!(markdown, required_linebreak_count, linebreak_type, prefix = nil)
return if required_linebreak_count == 0 || markdown.empty?
existing_linebreak_count = markdown[/(?:\\?\n|<br>\n)*\z/].count("\n")
if linebreak_type == LINEBREAK_HTML
max_linebreak_count = [existing_linebreak_count, required_linebreak_count - 1].max + 1
required_linebreak_count = max_linebreak_count if max_linebreak_count > EXPLICIT_LINEBREAK_THRESHOLD
end
return if existing_linebreak_count >= required_linebreak_count
rstrip!(markdown)
alternative_linebreak_start_index = required_linebreak_count > EXPLICIT_LINEBREAK_THRESHOLD ? 1 : 2
required_linebreak_count.times do |index|
linebreak = linebreak(linebreak_type, index, alternative_linebreak_start_index, required_linebreak_count)
markdown << (linebreak == "\n" ? prefix.rstrip : prefix) if prefix && index > 0
markdown << linebreak
end
end
def rstrip!(markdown)
markdown.gsub!(/\s*(?:\\?\n|<br>\n)*\z/, '')
end
def linebreak(linebreak_type, linebreak_index, alternative_linebreak_start_index, required_linebreak_count)
use_alternative_linebreak = linebreak_index >= alternative_linebreak_start_index
is_last_linebreak = linebreak_index + 1 == required_linebreak_count
return "<br>\n" if linebreak_type == LINEBREAK_HTML &&
use_alternative_linebreak && is_last_linebreak
return "\\\n" if linebreak_type == LINEBREAK_HARD ||
@traditional_linebreaks || use_alternative_linebreak
"\n"
end
def starts_with_whitespace?(text)
text.match?(/\A\s/)
end
def ends_with_whitespace?(text)
text.match?(/\s\z/)
end
end
end

View File

@ -1,14 +1,23 @@
# frozen_string_literal: true
require 'csv'
require 'yaml'
require_relative '../../base'
module ImportScripts::PhpBB3
class Settings
def self.load(filename)
yaml = YAML::load_file(filename)
Settings.new(yaml)
Settings.new(yaml.deep_stringify_keys.with_indifferent_access)
end
attr_reader :site_name
attr_reader :new_categories
attr_reader :category_mappings
attr_reader :tag_mappings
attr_reader :rank_mapping
attr_reader :import_anonymous_users
attr_reader :import_attachments
attr_reader :import_private_messages
@ -34,6 +43,14 @@ module ImportScripts::PhpBB3
def initialize(yaml)
import_settings = yaml['import']
@site_name = import_settings['site_name']
@new_categories = import_settings['new_categories']
@category_mappings = import_settings['category_mappings']
@tag_mappings = import_settings['tag_mappings']
@rank_mapping = import_settings['rank_mapping']
@import_anonymous_users = import_settings['anonymous_users']
@import_attachments = import_settings['attachments']
@import_private_messages = import_settings['private_messages']
@ -58,6 +75,20 @@ module ImportScripts::PhpBB3
@database = DatabaseSettings.new(yaml['database'])
end
def prefix(val)
@site_name.present? && val.present? ? "#{@site_name}:#{val}" : val
end
def trust_level_for_posts(rank, trust_level: 0)
if @rank_mapping.present?
@rank_mapping.each do |key, value|
trust_level = [trust_level, key.gsub('trust_level_', '').to_i].max if rank >= value
end
end
trust_level
end
end
class DatabaseSettings

View File

@ -3,10 +3,12 @@
module ImportScripts::PhpBB3
class SmileyProcessor
# @param uploader [ImportScripts::Uploader]
# @param database [ImportScripts::PhpBB3::Database_3_0 | ImportScripts::PhpBB3::Database_3_1]
# @param settings [ImportScripts::PhpBB3::Settings]
# @param phpbb_config [Hash]
def initialize(uploader, settings, phpbb_config)
def initialize(uploader, database, settings, phpbb_config)
@uploader = uploader
@database = database
@smilies_path = File.join(settings.base_dir, phpbb_config[:smilies_path])
@smiley_map = {}
@ -16,12 +18,16 @@ module ImportScripts::PhpBB3
def replace_smilies(text)
# :) is encoded as <!-- s:) --><img src="{SMILIES_PATH}/icon_e_smile.gif" alt=":)" title="Smile" /><!-- s:) -->
text.gsub!(/<!-- s(\S+) --><img src="\{SMILIES_PATH\}\/(.+?)" alt="(.*?)" title="(.*?)" \/><!-- s(?:\S+) -->/) do
smiley = $1
text.gsub!(/<!-- s(\S+) --><img src="\{SMILIES_PATH\}\/.+?" alt=".*?" title=".*?" \/><!-- s?:\S+ -->/) do
emoji($1)
end
end
@smiley_map.fetch(smiley) do
upload_smiley(smiley, $2, $3, $4) || smiley_as_text(smiley)
end
def emoji(smiley_code)
@smiley_map.fetch(smiley_code) do
smiley = @database.get_smiley(smiley_code)
emoji = upload_smiley(smiley_code, smiley[:smiley_url], smiley_code, smiley[:emotion]) if smiley
emoji || smiley_as_text(smiley_code)
end
end
@ -36,7 +42,7 @@ module ImportScripts::PhpBB3
[':o', ':-o', ':eek:'] => ':astonished:',
[':shock:'] => ':open_mouth:',
[':?', ':-?', ':???:'] => ':confused:',
['8-)', ':cool:'] => ':sunglasses:',
['8)', '8-)', ':cool:'] => ':sunglasses:',
[':lol:'] => ':laughing:',
[':x', ':-x', ':mad:'] => ':angry:',
[':P', ':-P', ':razz:'] => ':stuck_out_tongue:',

View File

@ -1,48 +1,75 @@
# frozen_string_literal: true
require_relative 'bbcode/xml_to_markdown'
module ImportScripts::PhpBB3
class TextProcessor
# @param lookup [ImportScripts::LookupContainer]
# @param database [ImportScripts::PhpBB3::Database_3_0 | ImportScripts::PhpBB3::Database_3_1]
# @param smiley_processor [ImportScripts::PhpBB3::SmileyProcessor]
# @param settings [ImportScripts::PhpBB3::Settings]
def initialize(lookup, database, smiley_processor, settings)
# @param phpbb_config [Hash]
def initialize(lookup, database, smiley_processor, settings, phpbb_config)
@lookup = lookup
@database = database
@smiley_processor = smiley_processor
@he = HTMLEntities.new
@use_xml_to_markdown = phpbb_config[:phpbb_version].start_with?('3.2')
@settings = settings
@new_site_prefix = settings.new_site_prefix
create_internal_link_regexps(settings.original_site_prefix)
end
def process_raw_text(raw)
text = raw.dup
text = CGI.unescapeHTML(text)
def process_raw_text(raw, attachments = nil)
if @use_xml_to_markdown
unreferenced_attachments = attachments&.dup
clean_bbcodes(text)
if @settings.use_bbcode_to_md
text = bbcode_to_md(text)
converter = BBCode::XmlToMarkdown.new(
raw,
username_from_user_id: lambda { |user_id| @lookup.find_username_by_import_id(user_id) },
smilie_to_emoji: lambda { |smilie| @smiley_processor.emoji(smilie).dup },
quoted_post_from_post_id: lambda { |post_id| @lookup.topic_lookup_from_imported_post_id(post_id) },
upload_md_from_file: (lambda do |filename, index|
unreferenced_attachments[index] = nil
attachments.fetch(index, filename).dup
end if attachments),
url_replacement: nil,
allow_inline_code: false
)
text = converter.convert
text.gsub!(@short_internal_link_regexp) do |link|
replace_internal_link(link, $1, $2)
end
add_unreferenced_attachments(text, unreferenced_attachments)
else
text = raw.dup
text = CGI.unescapeHTML(text)
clean_bbcodes(text)
if @settings.use_bbcode_to_md
text = bbcode_to_md(text)
end
process_smilies(text)
process_links(text)
process_lists(text)
process_code(text)
fix_markdown(text)
process_attachments(text, attachments) if attachments.present?
text
end
process_smilies(text)
process_links(text)
process_lists(text)
process_code(text)
fix_markdown(text)
text
end
def process_post(raw, attachments)
text = process_raw_text(raw)
text = process_attachments(text, attachments) if attachments.present?
text
process_raw_text(raw, attachments) rescue raw
end
def process_private_msg(raw, attachments)
text = process_raw_text(raw)
text = process_attachments(text, attachments) if attachments.present?
text
process_raw_text(raw, attachments) rescue raw
end
protected
@ -139,6 +166,12 @@ module ImportScripts::PhpBB3
attachments.fetch(index, real_filename)
end
add_unreferenced_attachments(text, unreferenced_attachments)
end
def add_unreferenced_attachments(text, unreferenced_attachments)
return text unless unreferenced_attachments
unreferenced_attachments = unreferenced_attachments.compact
text << "\n" << unreferenced_attachments.join("\n") unless unreferenced_attachments.empty?
text
@ -161,6 +194,7 @@ module ImportScripts::PhpBB3
def fix_markdown(text)
text.gsub!(/(\n*\[\/?quote.*?\]\n*)/mi) { |q| "\n#{q.strip}\n" }
text.gsub!(/^!\[[^\]]*\]\([^\]]*\)$/i) { |img| "\n#{img.strip}\n" } # space out images single on line
text
end
end

View File

@ -0,0 +1,817 @@
# frozen_string_literal: true
require 'rails_helper'
require Rails.root.join('script/import_scripts/phpbb3/support/bbcode/xml_to_markdown')
RSpec.describe ImportScripts::PhpBB3::BBCode::XmlToMarkdown do
def convert(xml, opts = {})
described_class.new(xml, opts).convert
end
it "converts unformatted text" do
xml = '<t>unformatted text</t>'
expect(convert(xml)).to eq('unformatted text')
end
it "converts nested formatting" do
xml = '<r><I><s>[i]</s>this is italic<B><s>[b]</s> and bold<e>[/b]</e></B> text<e>[/i]</e></I></r>'
expect(convert(xml)).to eq('_this is italic **and bold** text_')
end
context "bold text" do
it "converts bold text" do
xml = '<r><B><s>[b]</s>this is bold text<e>[/b]</e></B></r>'
expect(convert(xml)).to eq('**this is bold text**')
end
it "converts multi-line bold text" do
xml = <<~XML
<r><B><s>[b]</s>this is bold text<br/>
on two lines<e>[/b]</e></B><br/>
<br/>
<B><s>[b]</s>this is bold text<br/>
<br/>
<br/>
with two empty lines<e>[/b]</e></B></r>
XML
expect(convert(xml)).to eq(<<~MD.chomp)
**this is bold text
on two lines**
**this is bold text\\
\\
\\
with two empty lines**
MD
end
it "ignores duplicate bold text" do
xml = '<r><B><s>[b]</s><B><s>[b]</s>this is bold text<e>[/b]</e></B><e>[/b]</e></B></r>'
expect(convert(xml)).to eq('**this is bold text**')
end
end
context "italic text" do
it "converts italic text" do
xml = '<r><I><s>[i]</s>this is italic text<e>[/i]</e></I></r>'
expect(convert(xml)).to eq('_this is italic text_')
end
it "converts multi-line italic text" do
xml = <<~XML
<r><I><s>[i]</s>this is italic text<br/>
on two lines<e>[/i]</e></I><br/>
<br/>
<I><s>[i]</s>this is italic text<br/>
<br/>
<br/>
with two empty lines<e>[/i]</e></I></r>
XML
expect(convert(xml)).to eq(<<~MD.chomp)
_this is italic text
on two lines_
_this is italic text\\
\\
\\
with two empty lines_
MD
end
it "ignores duplicate italic text" do
xml = '<r><I><s>[i]</s><I><s>[i]</s>this is italic text<e>[/i]</e></I><e>[/i]</e></I></r>'
expect(convert(xml)).to eq('_this is italic text_')
end
end
context "underlined text" do
it "converts underlined text" do
xml = '<r><U><s>[u]</s>this is underlined text<e>[/u]</e></U></r>'
expect(convert(xml)).to eq('[u]this is underlined text[/u]')
end
it "converts multi-line underlined text" do
xml = <<~XML
<r><U><s>[u]</s>this is underlined text<br/>
on two lines<e>[/u]</e></U><br/>
<br/>
<U><s>[u]</s>this is underlined text<br/>
<br/>
<br/>
with two empty lines<e>[/u]</e></U></r>
XML
expect(convert(xml)).to eq(<<~MD.chomp)
[u]this is underlined text
on two lines[/u]
[u]this is underlined text\\
\\
\\
with two empty lines[/u]
MD
end
it "ignores duplicate underlined text" do
xml = '<r><U><s>[u]</s><U><s>[u]</s>this is underlined text<e>[/u]</e></U><e>[/u]</e></U></r>'
expect(convert(xml)).to eq('[u]this is underlined text[/u]')
end
end
context "code blocks" do
context "inline code blocks enabled" do
let(:opts) { { allow_inline_code: true } }
it "converts single line code blocks" do
xml = '<r><CODE><s>[code]</s>one line of code<e>[/code]</e></CODE></r>'
expect(convert(xml, opts)).to eq('`one line of code`')
end
end
context "inline code blocks disabled" do
it "converts single line code blocks" do
xml = '<r>foo <CODE><s>[code]</s>some code<e>[/code]</e></CODE> bar</r>'
expect(convert(xml)).to eq(<<~MD.chomp)
foo
```text
some code
```
bar
MD
end
end
it "converts multi-line code blocks" do
xml = <<~XML
<r><CODE><s>[code]</s><i>
</i> /\_/\
( o.o )
&gt; ^ &lt;
<e>[/code]</e></CODE></r>
XML
expect(convert(xml)).to eq(<<~MD.chomp)
```text
/\_/\
( o.o )
> ^ <
```
MD
end
it "adds leading and trailing linebreaks to code blocks" do
xml = <<~XML
<r>text before code block<br/>
<CODE><s>[code]</s><i>
</i>foo
bar
<e>[/code]</e></CODE>
text after code block</r>
XML
expect(convert(xml)).to eq(<<~MD.chomp)
text before code block
```text
foo
bar
```
text after code block
MD
end
end
context "lists" do
it "converts unordered lists" do
xml = <<~XML
<r><LIST><s>[list]</s>
<LI><s>[*]</s>Red</LI>
<LI><s>[*]</s>Blue</LI>
<LI><s>[*]</s>Yellow</LI>
<e>[/list]</e></LIST></r>
XML
expect(convert(xml)).to eq(<<~MD.chomp)
* Red
* Blue
* Yellow
MD
end
it "converts ordered lists" do
xml = <<~XML
<r><LIST type="decimal"><s>[list=1]</s>
<LI><s>[*]</s>Go to the shops</LI>
<LI><s>[*]</s>Buy a new computer</LI>
<LI><s>[*]</s>Swear at computer when it crashes</LI>
<e>[/list]</e></LIST></r>
XML
expect(convert(xml)).to eq(<<~MD.chomp)
1. Go to the shops
2. Buy a new computer
3. Swear at computer when it crashes
MD
end
it "converts all types of ordered lists into regular ordered lists" do
xml = <<~XML
<r><LIST type="upper-alpha"><s>[list=A]</s>
<LI><s>[*]</s>The first possible answer</LI>
<LI><s>[*]</s>The second possible answer</LI>
<LI><s>[*]</s>The third possible answer</LI>
<e>[/list]</e></LIST></r>
XML
expect(convert(xml)).to eq(<<~MD.chomp)
1. The first possible answer
2. The second possible answer
3. The third possible answer
MD
end
it "adds leading and trailing linebreaks to lists if needed" do
xml = <<~XML
<r>foo
<LIST><s>[list]</s>
<LI><s>[*]</s>Red</LI>
<LI><s>[*]</s>Blue</LI>
<LI><s>[*]</s>Yellow</LI>
<e>[/list]</e></LIST>
bar</r>
XML
expect(convert(xml)).to eq(<<~MD.chomp)
foo
* Red
* Blue
* Yellow
bar
MD
end
it "converts nested lists" do
xml = <<~XML
<r><LIST><s>[list]</s>
<LI><s>[*]</s>Option 1
<LIST><s>[list]</s>
<LI><s>[*]</s>Option 1.1</LI>
<LI><s>[*]</s>Option 1.2</LI>
<e>[/list]</e></LIST></LI>
<LI><s>[*]</s>Option 2
<LIST><s>[list]</s>
<LI><s>[*]</s>Option 2.1
<LIST type="decimal"><s>[list=1]</s>
<LI><s>[*]</s> Red</LI>
<LI><s>[*]</s> Blue</LI>
<e>[/list]</e></LIST></LI>
<LI><s>[*]</s>Option 2.2</LI>
<e>[/list]</e></LIST></LI>
<e>[/list]</e></LIST></r>
XML
expect(convert(xml)).to eq(<<~MD.chomp)
* Option 1
* Option 1.1
* Option 1.2
* Option 2
* Option 2.1
1. Red
2. Blue
* Option 2.2
MD
end
it "handles nested elements and linebreaks in list items" do
xml = <<~XML
<r><LIST><s>[list]</s><LI><s>[*]</s>some text <B><s>[b]</s><I><s>[i]</s>foo<e>[/i]</e></I><e>[/b]</e></B><br/>
or <B><s>[b]</s><I><s>[i]</s>bar<e>[/i]</e></I><e>[/b]</e></B> more text</LI><e>[/list]</e></LIST></r>
XML
expect(convert(xml)).to eq(<<~MD.chomp)
* some text **_foo_**
or **_bar_** more text
MD
end
end
context "images" do
it "converts image" do
xml = <<~XML
<r><IMG src="https://example.com/foo.png"><s>[img]</s>
<URL url="https://example.com/foo.png">
<LINK_TEXT text="https://example.com/foo.png">https://example.com/foo.png</LINK_TEXT>
</URL><e>[/img]</e></IMG></r>
XML
expect(convert(xml)).to eq('![](https://example.com/foo.png)')
end
it "converts image with link" do
xml = <<~XML
<r><URL url="https://example.com/"><s>[url=https://example.com/]</s>
<IMG src="https://example.com/foo.png"><s>[img]</s>
<LINK_TEXT text="https://example.com/foo.png">https://example.com/foo.png</LINK_TEXT>
<e>[/img]</e></IMG><e>[/url]</e></URL></r>
XML
expect(convert(xml)).to eq('[![](https://example.com/foo.png)](https://example.com/)')
end
end
context "links" do
it "converts links created without BBCode" do
xml = '<r><URL url="https://en.wikipedia.org/wiki/Capybara">https://en.wikipedia.org/wiki/Capybara</URL></r>'
expect(convert(xml)).to eq('https://en.wikipedia.org/wiki/Capybara')
end
it "converts links created with BBCode" do
xml = '<r><URL url="https://en.wikipedia.org/wiki/Capybara"><s>[url]</s>https://en.wikipedia.org/wiki/Capybara<e>[/url]</e></URL></r>'
expect(convert(xml)).to eq('https://en.wikipedia.org/wiki/Capybara')
end
it "converts links with link text" do
xml = '<r><URL url="https://en.wikipedia.org/wiki/Capybara"><s>[url=https://en.wikipedia.org/wiki/Capybara]</s>Capybara<e>[/url]</e></URL></r>'
expect(convert(xml)).to eq('[Capybara](https://en.wikipedia.org/wiki/Capybara)')
end
it "converts internal links" do
opts = {
url_replacement: lambda do |url|
if url == 'http://forum.example.com/viewtopic.php?f=2&t=2'
'https://discuss.example.com/t/welcome-topic/18'
end
end
}
xml = '<r><URL url="http://forum.example.com/viewtopic.php?f=2&amp;t=2"><LINK_TEXT text="viewtopic.php?f=2&amp;t=2">http://forum.example.com/viewtopic.php?f=2&amp;t=2</LINK_TEXT></URL></r>'
expect(convert(xml, opts)).to eq('https://discuss.example.com/t/welcome-topic/18')
end
it "converts email links created without BBCode" do
xml = '<r><EMAIL email="foo.bar@example.com">foo.bar@example.com</EMAIL></r>'
expect(convert(xml)).to eq('<foo.bar@example.com>')
end
it "converts email links created with BBCode" do
xml = '<r><EMAIL email="foo.bar@example.com"><s>[email]</s>foo.bar@example.com<e>[/email]</e></EMAIL></r>'
expect(convert(xml)).to eq('<foo.bar@example.com>')
end
it "converts truncated, long links" do
xml = <<~XML
<r><URL url="http://answers.yahoo.com/question/index?qid=20070920134223AAkkPli">
<s>[url]</s><LINK_TEXT text="http://answers.yahoo.com/question/index ... 223AAkkPli">
http://answers.yahoo.com/question/index?qid=20070920134223AAkkPli</LINK_TEXT>
<e>[/url]</e></URL></r>
XML
expect(convert(xml)).to eq('http://answers.yahoo.com/question/index?qid=20070920134223AAkkPli')
end
it "converts BBCodes inside link text" do
xml = <<~XML
<r><URL url="http://example.com"><s>[url=http://example.com]</s>
<B><s>[b]</s>Hello <I><s>[i]</s>world<e>[/i]</e></I>!<e>[/b]</e></B>
<e>[/url]</e></URL></r>
XML
expect(convert(xml)).to eq('[**Hello _world_!**](http://example.com)')
end
end
context "quotes" do
it "converts simple quote" do
xml = <<~XML
<r><QUOTE><s>[quote]</s>Lorem<br/>
ipsum<e>[/quote]</e></QUOTE></r>
XML
expect(convert(xml)).to eq(<<~MD.chomp)
> Lorem
> ipsum
MD
end
it "converts quote with line breaks" do
xml = <<~XML
<r><QUOTE><s>[quote]</s>First paragraph<br/>
<br/>
Second paragraph<br/>
<br/>
<br/>
Third paragraph<e>[/quote]</e></QUOTE></r>
XML
expect(convert(xml)).to eq(<<~MD.chomp)
> First paragraph
>
> Second paragraph
> \\
> \\
> Third paragraph
MD
end
it "converts quote with line breaks and nested formatting" do
xml = <<~XML
<r><QUOTE><s>[quote]</s>
<I><s>[i]</s>this is italic<br/>
<B><s>[b]</s>and bold<br/>
text<br/>
<e>[/b]</e></B> on multiple<br/>
<br/>
<br/>
lines<e>[/i]</e></I>
<e>[/quote]</e></QUOTE></r>
XML
expect(convert(xml)).to eq(<<~MD.chomp)
> _this is italic
> **and bold
> text**
> on multiple\\
> \\
> \\
> lines_
MD
end
it "converts quote with author attribute" do
xml = '<r><QUOTE author="Mr. Blobby"><s>[quote="Mr. Blobby"]</s>Lorem ipsum<e>[/quote]</e></QUOTE></r>'
expect(convert(xml)).to eq(<<~MD.chomp)
[quote="Mr. Blobby"]
Lorem ipsum
[/quote]
MD
end
it "converts quote with author attribute and line breaks" do
xml = <<~XML
<r><QUOTE author="Mr. Blobby"><s>[quote="Mr. Blobby"]</s>First paragraph<br/>
<br/>
Second paragraph<br/>
<br/>
Third paragraph<e>[/quote]</e></QUOTE></r>
XML
expect(convert(xml)).to eq(<<~MD.chomp)
[quote="Mr. Blobby"]
First paragraph
Second paragraph
Third paragraph
[/quote]
MD
end
context "with user_id attribute" do
let(:opts) { { username_from_user_id: lambda { |user_id| user_id == 48 ? "mr_blobby" : nil } } }
it "uses the correct username when the user exists" do
xml = '<r><QUOTE author="Mr. Blobby" user_id="48"><s>[quote="Mr. Blobby" user_id=48]</s>Lorem ipsum<e>[/quote]</e></QUOTE></r>'
expect(convert(xml, opts)).to eq(<<~MD.chomp)
[quote="mr_blobby"]
Lorem ipsum
[/quote]
MD
end
it "uses the author name when the user does not exist" do
xml = '<r><QUOTE author="Mr. Blobby" user_id="49"><s>[quote="Mr. Blobby" user_id=48]</s>Lorem ipsum<e>[/quote]</e></QUOTE></r>'
expect(convert(xml, opts)).to eq(<<~MD.chomp)
[quote="Mr. Blobby"]
Lorem ipsum
[/quote]
MD
end
it "creates a blockquote when the user does not exist and the author is missing" do
xml = '<r><QUOTE user_id="49"><s>[quote=user_id=48]</s>Lorem ipsum<e>[/quote]</e></QUOTE></r>'
expect(convert(xml, opts)).to eq("> Lorem ipsum")
end
end
context "with post_id attribute" do
let(:opts) do
{ quoted_post_from_post_id: lambda { |post_id| { username: 'mr_blobby', post_number: 3, topic_id: 951 } if post_id == 43 } }
end
it "uses information from the quoted post if the post exists" do
xml = <<~XML
<r><QUOTE author="Mr. Blobby" post_id="43" time="1534626128" user_id="48">
<s>[quote="Mr. Blobby" post_id=43 time=1534626128 user_id=48]</s>Lorem ipsum<e>[/quote]</e>
</QUOTE></r>
XML
expect(convert(xml, opts)).to eq(<<~MD.chomp)
[quote="mr_blobby, post:3, topic:951"]
Lorem ipsum
[/quote]
MD
end
it "uses other attributes when post doesn't exist" do
xml = <<~XML
<r><QUOTE author="Mr. Blobby" post_id="44" time="1534626128" user_id="48">
<s>[quote="Mr. Blobby" post_id=44 time=1534626128 user_id=48]</s>Lorem ipsum<e>[/quote]</e>
</QUOTE></r>
XML
expect(convert(xml, opts)).to eq(<<~MD.chomp)
[quote="Mr. Blobby"]
Lorem ipsum
[/quote]
MD
end
end
it "converts nested quotes" do
xml = <<~XML
<r>Multiple nested quotes:<br/>
<QUOTE author="user3">
<s>[quote=user3]</s>
<QUOTE author="user2">
<s>[quote=user2]</s>
<QUOTE author="user1">
<s>[quote=user1]</s>
<B><s>[b]</s>foo <I><s>[i]</s>and<e>[/i]</e></I> bar<e>[/b]</e></B>
<e>[/quote]</e>
</QUOTE>
Lorem ipsum
<e>[/quote]</e>
</QUOTE>
nested quotes
<e>[/quote]</e>
</QUOTE>
Text after quotes.
</r>
XML
expect(convert(xml)).to eq(<<~MD.chomp)
Multiple nested quotes:
[quote="user3"]
[quote="user2"]
[quote="user1"]
**foo _and_ bar**
[/quote]
Lorem ipsum
[/quote]
nested quotes
[/quote]
Text after quotes.
MD
end
end
it "converts smilies" do
opts = {
smilie_to_emoji: lambda do |smilie|
case smilie
when ':D'
':smiley:'
when ':eek:'
':astonished:'
end
end
}
xml = '<r><E>:D</E> <E>:eek:</E></r>'
expect(convert(xml, opts)).to eq(":smiley: :astonished:")
end
context "attachments" do
it "converts attachments" do
opts = {
upload_md_from_file: lambda do |filename, index|
url = \
case index
when 0 then
"upload://hash2.png"
when 1 then
"upload://hash1.png"
end
"![#{filename}|231x231](#{url})"
end
}
xml = <<~XML
<r>Multiple attachments:
<ATTACHMENT filename="image1.png" index="1"><s>[attachment=1]</s>image1.png<e>[/attachment]</e></ATTACHMENT>
This is an inline image.<br/>
<br/>
And another one:
<ATTACHMENT filename="image2.png" index="0"><s>[attachment=0]</s>image2.png<e>[/attachment]</e></ATTACHMENT></r>
XML
expect(convert(xml, opts)).to eq(<<~MD.chomp)
Multiple attachments:
![image1.png|231x231](upload://hash1.png)
This is an inline image.
And another one:
![image2.png|231x231](upload://hash2.png)
MD
end
end
context "line breaks" do
it "converts line breaks" do
xml = <<~XML
<t>Lorem ipsum dolor sit amet.<br/>
<br/>
Consetetur sadipscing elitr.<br/>
<br/>
<br/>
Sed diam nonumy eirmod tempor.<br/>
<br/>
<br/>
<br/>
<br/>
Invidunt ut labore et dolore.</t>
XML
expect(convert(xml)).to eq(<<~MD.chomp)
Lorem ipsum dolor sit amet.
Consetetur sadipscing elitr.
\\
\\
Sed diam nonumy eirmod tempor.
\\
\\
\\
\\
Invidunt ut labore et dolore.
MD
end
it "uses hard linebreaks when tradition line breaks are enabled" do
xml = <<~XML
<t>Lorem ipsum dolor sit amet.<br/>
Consetetur sadipscing elitr.<br/>
<br/>
Sed diam nonumy eirmod tempor.<br/>
<br/>
<br/>
<br/>
Invidunt ut labore et dolore.</t>
XML
expect(convert(xml, traditional_linebreaks: true)).to eq(<<~MD.chomp)
Lorem ipsum dolor sit amet.\\
Consetetur sadipscing elitr.\\
\\
Sed diam nonumy eirmod tempor.\\
\\
\\
\\
Invidunt ut labore et dolore.
MD
end
it "uses <br> in front of block elements" do
xml = <<~XML
<r>text before 4 empty lines<br/>
<br/>
<br/>
<br/>
<CODE><s>[code]</s>some code<e>[/code]</e></CODE>
text before 3 empty lines<br/>
<br/>
<br/>
<LIST><s>[list]</s>
<LI><s>[*]</s> item 1</LI>
<LI><s>[*]</s> item 2</LI>
<e>[/list]</e></LIST>
text before 2 empty lines<br/>
<br/>
<LIST><s>[list]</s>
<LI><s>[*]</s> item 1</LI>
<LI><s>[*]</s> item 2</LI>
<e>[/list]</e></LIST></r>
XML
expect(convert(xml)).to eq(<<~MD.chomp)
text before 4 empty lines
\\
\\
\\
<br>
```text
some code
```
text before 3 empty lines
\\
\\
<br>
* item 1
* item 2
text before 2 empty lines
\\
<br>
* item 1
* item 2
MD
end
end
context "whitespace" do
it "doesn't strip whitespaces from inline tags" do
xml = <<~XML
<r>Lorem<B><s>[b]</s> ipsum <e>[/b]</e></B>dolor<br/>
<I><s>[i]</s> sit <e>[/i]</e></I>amet,<br/>
consetetur<B><s>[b]</s> sadipscing <e>[/b]</e></B></r>
XML
expect(convert(xml)).to eq(<<~MD.rstrip)
Lorem **ipsum** dolor
_sit_ amet,
consetetur **sadipscing**
MD
end
it "preserves whitespace between tags" do
xml = "<r>foo <B><s>[b]</s>bold<e>[/b]</e></B> <I><s>[i]</s>italic<e>[/i]</e></I> <U><s>[u]</s>underlined<e>[/u]</e></U> bar</r>"
expect(convert(xml)).to eq("foo **bold** _italic_ [u]underlined[/u] bar")
end
end
context "unknown element" do
it "converts an unknown element right below the root element" do
xml = '<r><UNKNOWN><s>[unknown]</s>foo<e>[/unknown]</e></UNKNOWN></r>'
expect(convert(xml)).to eq('foo')
end
it "converts an unknown element inside a known element" do
xml = '<r><B><s>[b]</s><UNKNOWN><s>[unknown]</s>bar<e>[/unknown]</e></UNKNOWN><e>[/b]</e></B></r>'
expect(convert(xml)).to eq('**bar**')
end
end
context "font size" do
it "converts sizes to either <small> or <big>" do
xml = <<~XML
<r><SIZE size="50"><s>[size=50]</s>very small<e>[/size]</e></SIZE><br/>
<SIZE size="85"><s>[size=85]</s>small<e>[/size]</e></SIZE><br/>
<SIZE size="150"><s>[size=150]</s>large<e>[/size]</e></SIZE><br/>
<SIZE size="200"><s>[size=200]</s>very large<e>[/size]</e></SIZE></r>
XML
expect(convert(xml)).to eq(<<~MD.rstrip)
<small>very small</small>
<small>small</small>
<big>large</big>
<big>very large</big>
MD
end
it "ignores invalid sizes" do
xml = <<~XML
<r><SIZE size="-50"><s>[size=-50]</s>negative number<e>[/size]</e></SIZE><br/>
<SIZE size="0"><s>[size=0]</s>zero<e>[/size]</e></SIZE><br/>
<SIZE size="300"><s>[size=300]</s>too large<e>[/size]</e></SIZE><br/>
<SIZE size="abc"><s>[size=abc]</s>not a number<e>[/size]</e></SIZE><br/>
<SIZE><s>[size]</s>no size<e>[/size]</e></SIZE></r>
XML
expect(convert(xml)).to eq(<<~MD.rstrip)
negative number
zero
too large
not a number
no size
MD
end
end
end