2017-12-27 00:56:36 -05:00
|
|
|
# coding: utf-8
|
2019-05-02 18:17:27 -04:00
|
|
|
# frozen_string_literal: true
|
|
|
|
|
2015-03-14 20:57:46 -04:00
|
|
|
require 'mysql2'
|
2014-07-16 13:59:30 -04:00
|
|
|
require File.expand_path(File.dirname(__FILE__) + '/base.rb')
|
|
|
|
|
|
|
|
require 'htmlentities'
|
|
|
|
require 'tsort'
|
|
|
|
require 'set'
|
|
|
|
require 'optparse'
|
2014-07-16 17:18:52 -04:00
|
|
|
require 'etc'
|
2015-01-13 10:43:09 -05:00
|
|
|
require 'open3'
|
2014-07-16 13:59:30 -04:00
|
|
|
|
|
|
|
class ImportScripts::Smf2 < ImportScripts::Base
|
|
|
|
|
|
|
|
def self.run
|
|
|
|
options = Options.new
|
|
|
|
begin
|
|
|
|
options.parse!
|
2014-07-16 17:18:52 -04:00
|
|
|
rescue Options::SettingsError => err
|
|
|
|
$stderr.puts "Cannot load SMF settings: #{err.message}"
|
|
|
|
exit 1
|
2014-07-16 13:59:30 -04:00
|
|
|
rescue Options::Error => err
|
|
|
|
$stderr.puts err.to_s.capitalize
|
2014-07-16 17:18:52 -04:00
|
|
|
$stderr.puts options.usage
|
2014-07-16 13:59:30 -04:00
|
|
|
exit 1
|
|
|
|
end
|
|
|
|
new(options).perform
|
|
|
|
end
|
|
|
|
|
|
|
|
attr_reader :options
|
|
|
|
|
|
|
|
def initialize(options)
|
2015-01-13 10:43:09 -05:00
|
|
|
if options.timezone.nil?
|
|
|
|
$stderr.puts "No source timezone given and autodetection from PHP failed."
|
|
|
|
$stderr.puts "Use -t option to specify correct source timezone:"
|
|
|
|
$stderr.puts options.usage
|
|
|
|
exit 1
|
|
|
|
end
|
|
|
|
|
2014-07-16 13:59:30 -04:00
|
|
|
super()
|
|
|
|
@options = options
|
|
|
|
|
|
|
|
begin
|
2015-01-13 10:43:09 -05:00
|
|
|
Time.zone = options.timezone
|
2014-07-16 13:59:30 -04:00
|
|
|
rescue ArgumentError
|
2015-01-13 10:43:09 -05:00
|
|
|
$stderr.puts "Timezone name '#{options.timezone}' is invalid."
|
|
|
|
exit 1
|
2014-07-16 13:59:30 -04:00
|
|
|
end
|
|
|
|
|
|
|
|
if options.database.blank?
|
|
|
|
$stderr.puts "No database name given."
|
|
|
|
$stderr.puts options.usage
|
|
|
|
exit 1
|
|
|
|
end
|
|
|
|
if options.password == :ask
|
|
|
|
require 'highline'
|
|
|
|
$stderr.print "Enter password for MySQL database `#{options.database}`: "
|
|
|
|
options.password = HighLine.new.ask('') { |q| q.echo = false }
|
|
|
|
end
|
|
|
|
|
2014-07-29 16:55:38 -04:00
|
|
|
@default_db_connection = create_db_connection
|
2014-07-16 13:59:30 -04:00
|
|
|
end
|
|
|
|
|
|
|
|
def execute
|
|
|
|
import_groups
|
|
|
|
import_users
|
|
|
|
import_categories
|
|
|
|
import_posts
|
2014-07-16 15:40:52 -04:00
|
|
|
postprocess_posts
|
2017-04-02 16:23:03 -04:00
|
|
|
make_prettyurl_permalinks('/forum')
|
2014-07-16 13:59:30 -04:00
|
|
|
end
|
|
|
|
|
|
|
|
def import_groups
|
|
|
|
puts '', 'creating groups'
|
|
|
|
|
|
|
|
total = query(<<-SQL, as: :single)
|
|
|
|
SELECT COUNT(*) FROM {prefix}membergroups
|
|
|
|
WHERE min_posts = -1 AND group_type IN (1, 2)
|
|
|
|
SQL
|
|
|
|
|
|
|
|
create_groups(query(<<-SQL), total: total) { |group| group }
|
|
|
|
SELECT id_group AS id, group_name AS name
|
|
|
|
FROM {prefix}membergroups
|
|
|
|
WHERE min_posts = -1 AND group_type IN (1, 2)
|
|
|
|
SQL
|
|
|
|
end
|
|
|
|
|
|
|
|
GUEST_GROUP = -1
|
|
|
|
MEMBER_GROUP = 0
|
|
|
|
ADMIN_GROUP = 1
|
|
|
|
MODERATORS_GROUP = 2
|
|
|
|
|
|
|
|
def import_users
|
|
|
|
puts '', 'creating users'
|
|
|
|
total = query("SELECT COUNT(*) FROM {prefix}members", as: :single)
|
|
|
|
|
|
|
|
create_users(query(<<-SQL), total: total) do |member|
|
|
|
|
SELECT a.id_member, a.member_name, a.date_registered, a.real_name, a.email_address,
|
2017-04-02 16:27:57 -04:00
|
|
|
CONCAT(LCASE(a.member_name),':', a.passwd) AS password,
|
2014-07-16 13:59:30 -04:00
|
|
|
a.is_activated, a.last_login, a.birthdate, a.member_ip, a.id_group, a.additional_groups,
|
|
|
|
b.id_attach, b.file_hash, b.filename
|
|
|
|
FROM {prefix}members AS a
|
|
|
|
LEFT JOIN {prefix}attachments AS b ON a.id_member = b.id_member
|
|
|
|
SQL
|
|
|
|
group_ids = [ member[:id_group], *member[:additional_groups].split(',').map(&:to_i) ]
|
2015-03-27 09:37:59 -04:00
|
|
|
create_time = Time.zone.at(member[:date_registered]) rescue Time.now
|
|
|
|
last_seen_time = Time.zone.at(member[:last_login]) rescue nil
|
|
|
|
ip_addr = IPAddr.new(member[:member_ip]) rescue nil
|
2014-07-16 13:59:30 -04:00
|
|
|
{
|
|
|
|
id: member[:id_member],
|
|
|
|
username: member[:member_name],
|
2017-04-02 16:27:57 -04:00
|
|
|
password: member[:password],
|
2015-03-27 09:37:59 -04:00
|
|
|
created_at: create_time,
|
2014-07-16 13:59:30 -04:00
|
|
|
name: member[:real_name],
|
|
|
|
email: member[:email_address],
|
|
|
|
active: member[:is_activated] == 1,
|
|
|
|
approved: member[:is_activated] == 1,
|
2015-03-27 09:37:59 -04:00
|
|
|
last_seen_at: last_seen_time,
|
2014-07-16 13:59:30 -04:00
|
|
|
date_of_birth: member[:birthdate],
|
2015-03-27 09:37:59 -04:00
|
|
|
ip_address: ip_addr,
|
2014-07-16 13:59:30 -04:00
|
|
|
admin: group_ids.include?(ADMIN_GROUP),
|
|
|
|
moderator: group_ids.include?(MODERATORS_GROUP),
|
2015-03-27 09:37:59 -04:00
|
|
|
|
2014-07-16 13:59:30 -04:00
|
|
|
post_create_action: proc do |user|
|
2015-03-27 09:37:59 -04:00
|
|
|
user.update(created_at: create_time) if create_time < user.created_at
|
2017-04-02 16:28:16 -04:00
|
|
|
user.save
|
2014-07-16 13:59:30 -04:00
|
|
|
GroupUser.transaction do
|
|
|
|
group_ids.each do |gid|
|
|
|
|
(group_id = group_id_from_imported_group_id(gid)) &&
|
|
|
|
GroupUser.find_or_create_by(user: user, group_id: group_id)
|
|
|
|
end
|
|
|
|
end
|
|
|
|
if options.smfroot && member[:id_attach].present? && user.uploaded_avatar_id.blank?
|
|
|
|
(path = find_smf_attachment_path(member[:id_attach], member[:file_hash], member[:filename])) && begin
|
|
|
|
upload = create_upload(user.id, path, member[:filename])
|
|
|
|
if upload.persisted?
|
|
|
|
user.update(uploaded_avatar_id: upload.id)
|
|
|
|
end
|
|
|
|
rescue SystemCallError => err
|
|
|
|
puts "Could not import avatar: #{err.message}"
|
|
|
|
end
|
|
|
|
end
|
|
|
|
end
|
|
|
|
}
|
|
|
|
end
|
|
|
|
end
|
|
|
|
|
|
|
|
def import_categories
|
|
|
|
create_categories(query(<<-SQL)) do |board|
|
|
|
|
SELECT id_board, id_parent, name, description, member_groups
|
|
|
|
FROM {prefix}boards
|
|
|
|
ORDER BY id_parent ASC, id_board ASC
|
|
|
|
SQL
|
2015-03-12 16:15:02 -04:00
|
|
|
parent_id = category_id_from_imported_category_id(board[:id_parent]) if board[:id_parent] > 0
|
2014-07-16 13:59:30 -04:00
|
|
|
groups = (board[:member_groups] || "").split(/,/).map(&:to_i)
|
|
|
|
restricted = !groups.include?(GUEST_GROUP) && !groups.include?(MEMBER_GROUP)
|
2017-12-27 00:56:36 -05:00
|
|
|
if Category.find_by_name(board[:name])
|
|
|
|
board[:name] += board[:id_board].to_s
|
|
|
|
end
|
2014-07-16 13:59:30 -04:00
|
|
|
{
|
|
|
|
id: board[:id_board],
|
|
|
|
name: board[:name],
|
|
|
|
description: board[:description],
|
|
|
|
parent_category_id: parent_id,
|
|
|
|
post_create_action: restricted && proc do |category|
|
|
|
|
category.update(read_restricted: true)
|
|
|
|
groups.each do |imported_group_id|
|
|
|
|
(group_id = group_id_from_imported_group_id(imported_group_id)) &&
|
|
|
|
CategoryGroup.find_or_create_by(category: category, group_id: group_id) do |cg|
|
|
|
|
cg.permission_type = CategoryGroup.permission_types[:full]
|
|
|
|
end
|
|
|
|
end
|
|
|
|
end,
|
|
|
|
}
|
|
|
|
end
|
|
|
|
end
|
|
|
|
|
|
|
|
def import_posts
|
|
|
|
puts '', 'creating posts'
|
|
|
|
spinner = %w(/ - \\ |).cycle
|
|
|
|
total = query("SELECT COUNT(*) FROM {prefix}messages", as: :single)
|
|
|
|
PostCreator.class_eval do
|
|
|
|
def guardian
|
|
|
|
@guardian ||= if opts[:import_mode]
|
|
|
|
@@system_guardian ||= Guardian.new(Discourse.system_user)
|
|
|
|
else
|
|
|
|
Guardian.new(@user)
|
|
|
|
end
|
|
|
|
end
|
|
|
|
end
|
|
|
|
|
2014-07-30 00:18:52 -04:00
|
|
|
db2 = create_db_connection
|
|
|
|
|
2014-07-16 13:59:30 -04:00
|
|
|
create_posts(query(<<-SQL), total: total) do |message|
|
2017-04-02 16:33:48 -04:00
|
|
|
SELECT m.id_msg, m.id_topic, m.id_member, m.poster_time, m.body,
|
2014-07-30 00:18:52 -04:00
|
|
|
m.subject, t.id_board, t.id_first_msg, COUNT(a.id_attach) AS attachment_count
|
2014-07-16 13:59:30 -04:00
|
|
|
FROM {prefix}messages AS m
|
|
|
|
LEFT JOIN {prefix}topics AS t ON t.id_topic = m.id_topic
|
2014-07-30 00:18:52 -04:00
|
|
|
LEFT JOIN {prefix}attachments AS a ON a.id_msg = m.id_msg AND a.attachment_type = 0
|
|
|
|
GROUP BY m.id_msg
|
2017-04-02 16:31:06 -04:00
|
|
|
ORDER BY m.id_topic ASC, m.id_msg ASC
|
2014-07-16 13:59:30 -04:00
|
|
|
SQL
|
|
|
|
skip = false
|
2017-04-02 16:33:48 -04:00
|
|
|
ignore_quotes = false
|
2018-09-03 22:16:21 -04:00
|
|
|
|
2014-07-16 13:59:30 -04:00
|
|
|
post = {
|
|
|
|
id: message[:id_msg],
|
|
|
|
user_id: user_id_from_imported_user_id(message[:id_member]) || -1,
|
|
|
|
created_at: Time.zone.at(message[:poster_time]),
|
2018-09-03 22:16:21 -04:00
|
|
|
post_create_action: ignore_quotes && proc do |p|
|
|
|
|
p.custom_fields['import_rebake'] = 't'
|
|
|
|
p.save
|
2014-07-16 15:40:52 -04:00
|
|
|
end
|
2014-07-16 13:59:30 -04:00
|
|
|
}
|
2018-09-03 22:16:21 -04:00
|
|
|
|
2014-07-16 13:59:30 -04:00
|
|
|
if message[:id_msg] == message[:id_first_msg]
|
2015-03-12 16:15:02 -04:00
|
|
|
post[:category] = category_id_from_imported_category_id(message[:id_board])
|
2014-07-16 13:59:30 -04:00
|
|
|
post[:title] = decode_entities(message[:subject])
|
|
|
|
else
|
|
|
|
parent = topic_lookup_from_imported_post_id(message[:id_first_msg])
|
|
|
|
if parent
|
|
|
|
post[:topic_id] = parent[:topic_id]
|
|
|
|
else
|
|
|
|
puts "Parent post #{message[:id_first_msg]} doesn't exist. Skipping #{message[:id_msg]}: #{message[:subject][0..40]}"
|
|
|
|
skip = true
|
|
|
|
end
|
|
|
|
end
|
2014-07-30 00:18:52 -04:00
|
|
|
next nil if skip
|
|
|
|
|
|
|
|
attachments = message[:attachment_count] == 0 ? [] : query(<<-SQL, connection: db2, as: :array)
|
|
|
|
SELECT id_attach, file_hash, filename FROM {prefix}attachments
|
|
|
|
WHERE attachment_type = 0 AND id_msg = #{message[:id_msg]}
|
|
|
|
ORDER BY id_attach ASC
|
|
|
|
SQL
|
|
|
|
attachments.map! { |a| import_attachment(post, a) rescue (puts $! ; nil) }
|
|
|
|
post[:raw] = convert_message_body(message[:body], attachments, ignore_quotes: ignore_quotes)
|
|
|
|
next post
|
2014-07-16 13:59:30 -04:00
|
|
|
end
|
2014-07-16 15:40:52 -04:00
|
|
|
end
|
|
|
|
|
2014-07-30 00:18:52 -04:00
|
|
|
def import_attachment(post, attachment)
|
|
|
|
path = find_smf_attachment_path(attachment[:id_attach], attachment[:file_hash], attachment[:filename])
|
|
|
|
raise "Attachment for post #{post[:id]} failed: #{attachment[:filename]}" unless path.present?
|
|
|
|
upload = create_upload(post[:user_id], path, attachment[:filename])
|
|
|
|
raise "Attachment for post #{post[:id]} failed: #{upload.errors.full_messages.join(', ')}" unless upload.persisted?
|
2019-11-14 15:10:51 -05:00
|
|
|
upload
|
2014-07-30 00:18:52 -04:00
|
|
|
rescue SystemCallError => err
|
|
|
|
raise "Attachment for post #{post[:id]} failed: #{err.message}"
|
|
|
|
end
|
|
|
|
|
2014-07-16 15:40:52 -04:00
|
|
|
def postprocess_posts
|
|
|
|
puts '', 'rebaking posts'
|
2014-07-16 13:59:30 -04:00
|
|
|
|
2014-07-16 15:40:52 -04:00
|
|
|
tags = PostCustomField.where(name: 'import_rebake', value: 't')
|
|
|
|
tags_total = tags.count
|
|
|
|
tags_done = 0
|
|
|
|
|
|
|
|
tags.each do |tag|
|
|
|
|
post = tag.post
|
|
|
|
Post.transaction do
|
2020-02-24 13:51:45 -05:00
|
|
|
post.raw = convert_bbcode(post.raw)
|
2014-07-16 15:40:52 -04:00
|
|
|
post.rebake!
|
|
|
|
post.save
|
|
|
|
tag.destroy!
|
|
|
|
end
|
|
|
|
print_status(tags_done += 1, tags_total)
|
2014-07-16 13:59:30 -04:00
|
|
|
end
|
|
|
|
end
|
|
|
|
|
|
|
|
private
|
|
|
|
|
2014-07-29 16:55:38 -04:00
|
|
|
def create_db_connection
|
|
|
|
Mysql2::Client.new(host: options.host, username: options.username,
|
|
|
|
password: options.password, database: options.database)
|
|
|
|
end
|
|
|
|
|
2014-07-16 13:59:30 -04:00
|
|
|
def query(sql, **opts, &block)
|
2014-07-29 16:55:38 -04:00
|
|
|
db = opts[:connection] || @default_db_connection
|
|
|
|
return __query(db, sql).to_a if opts[:as] == :array
|
|
|
|
return __query(db, sql, as: :array).first[0] if opts[:as] == :single
|
|
|
|
return __query(db, sql, stream: true).each(&block) if block_given?
|
2019-11-14 15:10:51 -05:00
|
|
|
__query(db, sql, stream: true)
|
2014-07-16 13:59:30 -04:00
|
|
|
end
|
|
|
|
|
2014-07-29 16:55:38 -04:00
|
|
|
def __query(db, sql, **opts)
|
|
|
|
db.query(sql.gsub('{prefix}', options.prefix),
|
2014-07-16 13:59:30 -04:00
|
|
|
{ symbolize_keys: true, cache_rows: false }.merge(opts))
|
|
|
|
end
|
|
|
|
|
2014-07-29 22:56:18 -04:00
|
|
|
TRTR_TABLE = begin
|
|
|
|
from = "ŠŽšžŸÀÁÂÃÄÅÇÈÉÊËÌÍÎÏÑÒÓÔÕÖØÙÚÛÜÝàáâãäåçèéêëìíîïñòóôõöøùúûüýÿ"
|
|
|
|
to = "SZszYAAAAAACEEEEIIIINOOOOOOUUUUYaaaaaaceeeeiiiinoooooouuuuyy"
|
|
|
|
from.chars.zip(to.chars)
|
|
|
|
end
|
|
|
|
|
2014-07-16 13:59:30 -04:00
|
|
|
def find_smf_attachment_path(attachment_id, file_hash, filename)
|
2014-07-29 22:56:18 -04:00
|
|
|
cleaned_name = filename.dup
|
|
|
|
TRTR_TABLE.each { |from, to| cleaned_name.gsub!(from, to) }
|
|
|
|
cleaned_name.gsub!(/\s/, '_')
|
|
|
|
cleaned_name.gsub!(/[^\w_\.\-]/, '')
|
|
|
|
legacy_name = "#{attachment_id}_#{cleaned_name.gsub('.', '_')}#{Digest::MD5.hexdigest(cleaned_name)}"
|
|
|
|
|
|
|
|
[ filename, "#{attachment_id}_#{file_hash}", legacy_name ]
|
2014-07-16 13:59:30 -04:00
|
|
|
.map { |name| File.join(options.smfroot, 'attachments', name) }
|
|
|
|
.detect { |file| File.exists?(file) }
|
|
|
|
end
|
|
|
|
|
|
|
|
def decode_entities(*args)
|
|
|
|
(@html_entities ||= HTMLEntities.new).decode(*args)
|
|
|
|
end
|
|
|
|
|
|
|
|
def convert_message_body(body, attachments = [], **opts)
|
|
|
|
body = decode_entities(body.gsub(/<br\s*\/>/, "\n"))
|
|
|
|
body.gsub!(ColorPattern, '\k<inner>')
|
|
|
|
body.gsub!(ListPattern) do |s|
|
|
|
|
params = parse_tag_params($~[:params])
|
|
|
|
tag = params['type'] == 'decimal' ? 'ol' : 'ul'
|
|
|
|
"\n[#{tag}]#{$~[:inner].strip}[/#{tag}]\n"
|
|
|
|
end
|
|
|
|
body.gsub!(XListPattern) do |s|
|
2019-05-30 16:20:57 -04:00
|
|
|
r = +"\n[ul]"
|
2020-02-24 13:51:45 -05:00
|
|
|
s.lines.each { |l| "#{r}[li]#{l.strip.sub(/^\[x\]\s*/, '')}[/li]" }
|
|
|
|
"#{r}[/ul]\n"
|
2014-07-16 13:59:30 -04:00
|
|
|
end
|
2014-07-30 00:18:52 -04:00
|
|
|
|
|
|
|
if attachments.present?
|
|
|
|
use_count = Hash.new(0)
|
|
|
|
AttachmentPatterns.each do |p|
|
|
|
|
pattern, emitter = *p
|
|
|
|
body.gsub!(pattern) do |s|
|
|
|
|
next s unless (num = $~[:num].to_i - 1) >= 0
|
|
|
|
next s unless (upload = attachments[num]).present?
|
|
|
|
use_count[num] += 1
|
|
|
|
instance_exec(upload, &emitter)
|
|
|
|
end
|
|
|
|
end
|
|
|
|
if use_count.keys.length < attachments.select(&:present?).length
|
2020-02-24 13:51:45 -05:00
|
|
|
body = "#{body}\n\n---"
|
2014-07-30 00:18:52 -04:00
|
|
|
attachments.each_with_index do |upload, num|
|
|
|
|
if upload.present? && use_count[num] == (0)
|
2020-02-24 13:51:45 -05:00
|
|
|
"#{body}\n\n#{get_upload_markdown(upload)}"
|
2014-07-30 00:18:52 -04:00
|
|
|
end
|
|
|
|
end
|
|
|
|
end
|
|
|
|
end
|
|
|
|
|
2020-02-24 13:51:45 -05:00
|
|
|
opts[:ignore_quotes] ? body : convert_bbcode(body)
|
2014-07-16 13:59:30 -04:00
|
|
|
end
|
|
|
|
|
2014-07-30 00:18:52 -04:00
|
|
|
def get_upload_markdown(upload)
|
2017-04-02 16:25:08 -04:00
|
|
|
html_for_upload(upload, upload.original_filename)
|
2014-07-30 00:18:52 -04:00
|
|
|
end
|
|
|
|
|
2014-07-16 13:59:30 -04:00
|
|
|
def convert_quotes(body)
|
|
|
|
body.to_s.gsub(QuotePattern) do |s|
|
|
|
|
inner = $~[:inner].strip
|
|
|
|
params = parse_tag_params($~[:params])
|
|
|
|
if params['author'].present?
|
2020-02-24 13:51:45 -05:00
|
|
|
quote = +"\n[quote=\"#{params['author']}"
|
2014-07-16 13:59:30 -04:00
|
|
|
if QuoteParamsPattern =~ params['link']
|
|
|
|
tl = topic_lookup_from_imported_post_id($~[:msg].to_i)
|
2020-02-24 13:51:45 -05:00
|
|
|
quote = "#{quote} post:#{tl[:post_number]}, topic:#{tl[:topic_id]}" if tl
|
2014-07-16 13:59:30 -04:00
|
|
|
end
|
2020-02-24 13:51:45 -05:00
|
|
|
quote = "#{quote}\"]\n#{convert_quotes(inner)}\n[/quote]"
|
2014-07-16 13:59:30 -04:00
|
|
|
else
|
2017-04-02 16:38:23 -04:00
|
|
|
"<blockquote>#{convert_quotes(inner)}</blockquote>"
|
2014-07-16 13:59:30 -04:00
|
|
|
end
|
|
|
|
end
|
|
|
|
end
|
|
|
|
|
2020-03-11 11:19:59 -04:00
|
|
|
IGNORED_BBCODE ||= %w{
|
|
|
|
black blue center color email flash font glow green iurl left list move red
|
|
|
|
right shadown size table time white
|
|
|
|
}
|
|
|
|
|
2020-02-24 13:51:45 -05:00
|
|
|
def convert_bbcode(raw)
|
|
|
|
return "" if raw.blank?
|
|
|
|
|
|
|
|
raw = convert_quotes(raw)
|
|
|
|
|
|
|
|
# [acronym]
|
|
|
|
raw.gsub!(/\[acronym=([^\]]+)\](.*?)\[\/acronym\]/im) { %{<abbr title="#{$1}">#{$2}</abbr>} }
|
|
|
|
|
|
|
|
# [br]
|
|
|
|
raw.gsub!(/\[br\]/i, "\n")
|
|
|
|
raw.gsub!(/<br\s*\/?>/i, "\n")
|
|
|
|
# [hr]
|
|
|
|
raw.gsub!(/\[hr\]/i, "<hr/>")
|
|
|
|
|
|
|
|
# [sub]
|
|
|
|
raw.gsub!(/\[sub\](.*?)\[\/sub\]/im) { "<sub>#{$1}</sub>" }
|
|
|
|
# [sup]
|
|
|
|
raw.gsub!(/\[sup\](.*?)\[\/sup\]/im) { "<sup>#{$1}</sup>" }
|
|
|
|
|
|
|
|
# [html]
|
|
|
|
raw.gsub!(/\[html\]/i, "\n```html\n")
|
|
|
|
raw.gsub!(/\[\/html\]/i, "\n```\n")
|
|
|
|
|
|
|
|
# [php]
|
|
|
|
raw.gsub!(/\[php\]/i, "\n```php\n")
|
|
|
|
raw.gsub!(/\[\/php\]/i, "\n```\n")
|
|
|
|
|
|
|
|
# [code]
|
|
|
|
raw.gsub!(/\[\/?code\]/i, "\n```\n")
|
|
|
|
|
|
|
|
# [pre]
|
|
|
|
raw.gsub!(/\[\/?pre\]/i, "\n```\n")
|
|
|
|
|
|
|
|
# [tt]
|
|
|
|
raw.gsub!(/\[\/?tt\]/i, "`")
|
|
|
|
|
|
|
|
# [ftp]
|
|
|
|
raw.gsub!(/\[ftp/i, "[url")
|
|
|
|
raw.gsub!(/\[\/ftp\]/i, "[/url]")
|
|
|
|
|
|
|
|
# [me]
|
|
|
|
raw.gsub!(/\[me=([^\]]*)\](.*?)\[\/me\]/im) { "_\\* #{$1} #{$2}_" }
|
|
|
|
|
|
|
|
# [ul]
|
|
|
|
raw.gsub!(/\[ul\]/i, "")
|
|
|
|
raw.gsub!(/\[\/ul\]/i, "")
|
|
|
|
|
|
|
|
# [li]
|
|
|
|
raw.gsub!(/\[li\](.*?)\[\/li\]/im) { "- #{$1}" }
|
|
|
|
|
|
|
|
# puts [img] on their own line
|
|
|
|
raw.gsub!(/\[img[^\]]*\](.*?)\[\/img\]/im) { "\n#{$1}\n" }
|
|
|
|
|
|
|
|
# puts [youtube] on their own line
|
|
|
|
raw.gsub!(/\[youtube\](.*?)\[\/youtube\]/im) { "\n#{$1}\n" }
|
|
|
|
|
|
|
|
IGNORED_BBCODE.each { |code| raw.gsub!(/\[#{code}[^\]]*\](.*?)\[\/#{code}\]/im, '\1') }
|
|
|
|
|
|
|
|
# ensure [/quote] are on their own line
|
|
|
|
raw.gsub!(/\s*\[\/quote\]\s*/im, "\n[/quote]\n")
|
|
|
|
|
|
|
|
# remove tapatalk mess
|
|
|
|
raw.gsub!(/Sent from .+? using \[url=.*?\].+?\[\/url\]/i, "")
|
|
|
|
raw.gsub!(/Sent from .+? using .+?\z/i, "")
|
|
|
|
|
|
|
|
# clean URLs
|
|
|
|
raw.gsub!(/\[url=(.+?)\]\1\[\/url\]/i, '\1')
|
|
|
|
|
|
|
|
raw
|
|
|
|
end
|
|
|
|
|
2014-07-16 13:59:30 -04:00
|
|
|
def extract_quoted_message_ids(body)
|
|
|
|
Set.new.tap do |quoted|
|
|
|
|
body.scan(/\[quote\s+([^\]]+)\s*\]/) do |params|
|
|
|
|
params = parse_tag_params(params)
|
|
|
|
if params.has_key?("link")
|
|
|
|
match = QuoteParamsPattern.match(params["link"])
|
2020-02-24 13:51:45 -05:00
|
|
|
quoted = "#{quoted}#{match[:msg].to_i}" if match
|
2014-07-16 13:59:30 -04:00
|
|
|
end
|
|
|
|
end
|
|
|
|
end
|
|
|
|
end
|
|
|
|
|
|
|
|
# param1=value1=still1 value1 param2=value2 ...
|
|
|
|
# => {'param1' => 'value1=still1 value1', 'param2' => 'value2 ...'}
|
|
|
|
def parse_tag_params(params)
|
2014-09-30 15:01:50 -04:00
|
|
|
params.to_s.strip.scan(/(?<param>\w+)=(?<value>(?:(?>\S+)|\s+(?!\w+=))*)/).
|
|
|
|
inject({}) { |h, e| h[e[0]] = e[1]; h }
|
2014-07-16 13:59:30 -04:00
|
|
|
end
|
|
|
|
|
|
|
|
class << self
|
|
|
|
private
|
|
|
|
|
|
|
|
# [tag param=value param2=value2]
|
|
|
|
# text
|
|
|
|
# [tag nested=true]text[/tag]
|
|
|
|
# [/tag]
|
|
|
|
# => match[:params] == 'param=value param2=value2'
|
|
|
|
# match[:inner] == "\n text\n [tag nested=true]text[/tag]\n"
|
|
|
|
def build_nested_tag_regex(ltag, rtag = nil)
|
|
|
|
rtag ||= '/' + ltag
|
|
|
|
%r{
|
|
|
|
\[#{ltag}(?-x:[ =](?<params>[^\]]*))?\] # consume open tag, followed by...
|
|
|
|
(?<inner>(?:
|
|
|
|
(?> [^\[]+ ) # non-tags, or...
|
|
|
|
|
|
|
|
|
\[(?! #{ltag}(?-x:[ =][^\]]*)?\] | #{rtag}\]) # different tags, or ...
|
|
|
|
|
|
|
|
|
(?<re> # recursively matched tags of the same kind
|
|
|
|
\[#{ltag}(?-x:[ =][^\]]*)?\]
|
|
|
|
(?:
|
|
|
|
(?> [^\[]+ )
|
|
|
|
|
|
|
|
|
\[(?! #{ltag}(?-x:[ =][^\]]*)?\] | #{rtag}\])
|
|
|
|
|
|
|
|
|
\g<re> # recursion here
|
|
|
|
)*
|
|
|
|
\[#{rtag}\]
|
|
|
|
)
|
|
|
|
)*)
|
|
|
|
\[#{rtag}\]
|
|
|
|
}x
|
|
|
|
end
|
|
|
|
end
|
|
|
|
|
|
|
|
QuoteParamsPattern = /^topic=(?<topic>\d+).msg(?<msg>\d+)#msg\k<msg>$/
|
|
|
|
XListPattern = /(?<xblock>(?>^\[x\]\s*(?<line>.*)$\n?)+)/
|
|
|
|
QuotePattern = build_nested_tag_regex('quote')
|
|
|
|
ColorPattern = build_nested_tag_regex('color')
|
|
|
|
ListPattern = build_nested_tag_regex('list')
|
2014-07-30 00:18:52 -04:00
|
|
|
AttachmentPatterns = [
|
|
|
|
[/^\[attach(?:|img|url|mini)=(?<num>\d+)\]$/, ->(u) { "\n" + get_upload_markdown(u) + "\n" }],
|
|
|
|
[/\[attach(?:|img|url|mini)=(?<num>\d+)\]/, ->(u) { get_upload_markdown(u) }]
|
|
|
|
]
|
|
|
|
|
2014-07-16 13:59:30 -04:00
|
|
|
# Provides command line options and parses the SMF settings file.
|
|
|
|
class Options
|
|
|
|
|
|
|
|
class Error < StandardError ; end
|
2014-07-16 17:18:52 -04:00
|
|
|
class SettingsError < Error ; end
|
2014-07-16 13:59:30 -04:00
|
|
|
|
|
|
|
def parse!(args = ARGV)
|
2014-07-16 17:18:52 -04:00
|
|
|
raise Error, 'not enough arguments' if ARGV.empty?
|
2014-07-16 13:59:30 -04:00
|
|
|
begin
|
|
|
|
parser.parse!(args)
|
|
|
|
rescue OptionParser::ParseError => err
|
|
|
|
raise Error, err.message
|
|
|
|
end
|
|
|
|
raise Error, 'too many arguments' if args.length > 1
|
|
|
|
self.smfroot = args.first
|
|
|
|
read_smf_settings if self.smfroot
|
2014-07-29 16:53:41 -04:00
|
|
|
|
|
|
|
self.host ||= 'localhost'
|
|
|
|
self.username ||= Etc.getlogin
|
|
|
|
self.prefix ||= 'smf_'
|
2015-01-13 10:43:09 -05:00
|
|
|
self.timezone ||= get_php_timezone
|
2014-07-16 13:59:30 -04:00
|
|
|
end
|
|
|
|
|
|
|
|
def usage
|
|
|
|
parser.to_s
|
|
|
|
end
|
|
|
|
|
|
|
|
attr_accessor :host
|
|
|
|
attr_accessor :username
|
|
|
|
attr_accessor :password
|
|
|
|
attr_accessor :database
|
|
|
|
attr_accessor :prefix
|
|
|
|
attr_accessor :smfroot
|
2015-01-13 10:43:09 -05:00
|
|
|
attr_accessor :timezone
|
2014-07-16 13:59:30 -04:00
|
|
|
|
|
|
|
private
|
|
|
|
|
2015-01-13 10:43:09 -05:00
|
|
|
def get_php_timezone
|
2017-04-02 16:35:43 -04:00
|
|
|
phpinfo, status = Open3.capture2('php', '-i')
|
2015-01-13 10:43:09 -05:00
|
|
|
phpinfo.lines.each do |line|
|
|
|
|
key, *vals = line.split(' => ').map(&:strip)
|
|
|
|
break vals[0] if key == 'Default timezone'
|
|
|
|
end
|
|
|
|
rescue Errno::ENOENT
|
|
|
|
$stderr.puts "Error: PHP CLI executable not found"
|
|
|
|
end
|
|
|
|
|
2014-07-16 13:59:30 -04:00
|
|
|
def read_smf_settings
|
|
|
|
settings = File.join(self.smfroot, 'Settings.php')
|
2021-10-27 04:39:28 -04:00
|
|
|
File.readlines(settings).each do |line|
|
2014-07-16 13:59:30 -04:00
|
|
|
next unless m = /\$([a-z_]+)\s*=\s*['"](.+?)['"]\s*;\s*((#|\/\/).*)?$/.match(line)
|
|
|
|
case m[1]
|
|
|
|
when 'db_server' then self.host ||= m[2]
|
|
|
|
when 'db_user' then self.username ||= m[2]
|
|
|
|
when 'db_passwd' then self.password ||= m[2]
|
|
|
|
when 'db_name' then self.database ||= m[2]
|
|
|
|
when 'db_prefix' then self.prefix ||= m[2]
|
|
|
|
end
|
|
|
|
end
|
|
|
|
rescue => err
|
2014-07-16 17:18:52 -04:00
|
|
|
raise SettingsError, err.message unless self.database
|
2014-07-16 13:59:30 -04:00
|
|
|
end
|
|
|
|
|
|
|
|
def parser
|
2014-07-16 17:18:52 -04:00
|
|
|
@parser ||= OptionParser.new(nil, 12) do |o|
|
|
|
|
o.banner = "Usage:\t#{File.basename($0)} <SMFROOT> [options]\n"
|
2020-02-24 13:51:45 -05:00
|
|
|
o.banner = "${o.banner}\t#{File.basename($0)} -d <DATABASE> [options]"
|
2014-07-16 17:18:52 -04:00
|
|
|
o.on('-h HOST', :REQUIRED, "MySQL server hostname [\"#{self.host}\"]") { |s| self.host = s }
|
|
|
|
o.on('-u USER', :REQUIRED, "MySQL username [\"#{self.username}\"]") { |s| self.username = s }
|
|
|
|
o.on('-p [PASS]', :OPTIONAL, 'MySQL password. Without argument, reads password from STDIN.') { |s| self.password = s || :ask }
|
|
|
|
o.on('-d DBNAME', :REQUIRED, 'Name of SMF database') { |s| self.database = s }
|
|
|
|
o.on('-f PREFIX', :REQUIRED, "Table names prefix [\"#{self.prefix}\"]") { |s| self.prefix = s }
|
2015-01-13 10:43:09 -05:00
|
|
|
o.on('-t TIMEZONE', :REQUIRED, 'Timezone used by SMF2 [auto-detected from PHP]') { |s| self.timezone = s }
|
2014-07-16 13:59:30 -04:00
|
|
|
end
|
|
|
|
end
|
|
|
|
|
|
|
|
end #Options
|
|
|
|
|
|
|
|
# Framework around TSort, used to build a dependency graph over messages
|
|
|
|
# to find and solve cyclic quotations.
|
|
|
|
class MessageDependencyGraph
|
|
|
|
include TSort
|
|
|
|
|
|
|
|
def initialize
|
|
|
|
@nodes = {}
|
|
|
|
end
|
|
|
|
|
|
|
|
def [](key)
|
|
|
|
@nodes[key]
|
|
|
|
end
|
|
|
|
|
|
|
|
def add_message(id, prev = nil, quoted = [])
|
|
|
|
@nodes[id] = Node.new(self, id, prev, quoted)
|
|
|
|
end
|
|
|
|
|
|
|
|
def tsort_each_node(&block)
|
|
|
|
@nodes.each_value(&block)
|
|
|
|
end
|
|
|
|
|
|
|
|
def tsort_each_child(node, &block)
|
|
|
|
node.dependencies.each(&block)
|
|
|
|
end
|
|
|
|
|
|
|
|
def cycles
|
|
|
|
strongly_connected_components.select { |c| c.length > 1 }.to_a
|
|
|
|
end
|
|
|
|
|
|
|
|
class Node
|
|
|
|
attr_reader :id
|
|
|
|
|
|
|
|
def initialize(graph, id, prev = nil, quoted = [])
|
|
|
|
@graph = graph
|
|
|
|
@id = id
|
|
|
|
@prev = prev
|
|
|
|
@quoted = quoted
|
|
|
|
end
|
|
|
|
|
|
|
|
def prev
|
|
|
|
@graph[@prev]
|
|
|
|
end
|
|
|
|
|
|
|
|
def quoted
|
2014-10-01 07:36:26 -04:00
|
|
|
@quoted.map { |id| @graph[id] }.reject(&:nil?)
|
2014-07-16 13:59:30 -04:00
|
|
|
end
|
|
|
|
|
|
|
|
def ignore_quotes?
|
|
|
|
!!@ignore_quotes
|
|
|
|
end
|
|
|
|
|
|
|
|
def ignore_quotes=(value)
|
|
|
|
@ignore_quotes = !!value
|
|
|
|
@dependencies = nil
|
|
|
|
end
|
|
|
|
|
|
|
|
def dependencies
|
|
|
|
@dependencies ||= Set.new.tap do |deps|
|
|
|
|
deps.merge(quoted) unless ignore_quotes?
|
|
|
|
deps << prev if prev.present?
|
|
|
|
end.to_a
|
|
|
|
end
|
|
|
|
|
|
|
|
def hash
|
|
|
|
@id.hash
|
|
|
|
end
|
|
|
|
|
|
|
|
def eql?(other)
|
|
|
|
@id.eql?(other)
|
|
|
|
end
|
|
|
|
|
|
|
|
def inspect
|
2014-10-01 07:36:26 -04:00
|
|
|
"#<#{self.class.name}: id=#{id.inspect}, prev=#{safe_id(@prev)}, quoted=[#{@quoted.map(&method(:safe_id)).join(', ')}]>"
|
|
|
|
end
|
|
|
|
|
|
|
|
private
|
|
|
|
|
|
|
|
def safe_id(id)
|
|
|
|
@graph[id].present? ? @graph[id].id.inspect : "(#{id})"
|
2014-07-16 13:59:30 -04:00
|
|
|
end
|
|
|
|
end #Node
|
|
|
|
|
|
|
|
end #MessageDependencyGraph
|
|
|
|
|
2017-04-02 16:23:03 -04:00
|
|
|
def make_prettyurl_permalinks(prefix)
|
|
|
|
puts 'creating permalinks for prettyurl plugin'
|
|
|
|
begin
|
|
|
|
serialized = query(<<-SQL, as: :single)
|
|
|
|
SELECT value FROM {prefix}settings
|
|
|
|
WHERE variable='pretty_board_urls';
|
|
|
|
SQL
|
|
|
|
board_slugs = Array.new
|
|
|
|
ser = /\{(.*)\}/.match(serialized)[1]
|
|
|
|
ser.scan(/i:(\d+);s:\d+:\"(.*?)\";/).each do |nv|
|
|
|
|
board_slugs[nv[0].to_i] = nv[1]
|
|
|
|
end
|
|
|
|
topic_urls = query(<<-SQL, as: :array)
|
|
|
|
SELECT t.id_first_msg, t.id_board,u.pretty_url
|
|
|
|
FROM smf_topics t
|
|
|
|
LEFT JOIN smf_pretty_topic_urls u ON u.id_topic = t.id_topic ;
|
|
|
|
SQL
|
|
|
|
topic_urls.each do |url|
|
|
|
|
t = topic_lookup_from_imported_post_id(url[:id_first_msg])
|
|
|
|
Permalink.create(url: "#{prefix}/#{board_slugs[url[:id_board]]}/#{url[:pretty_url]}", topic_id: t[:topic_id])
|
|
|
|
end
|
|
|
|
rescue
|
|
|
|
end
|
|
|
|
end
|
|
|
|
|
2014-07-16 13:59:30 -04:00
|
|
|
end
|
|
|
|
|
|
|
|
ImportScripts::Smf2.run
|