Merge pull request #2607 from Elberet/smf2-import

Improved SMF2 importer, now imports post attachments
This commit is contained in:
Sam 2014-07-31 10:49:38 +10:00
commit 9950c5bcd6
1 changed files with 113 additions and 20 deletions

View File

@ -54,16 +54,19 @@ class ImportScripts::Smf2 < ImportScripts::Base
options.password = HighLine.new.ask('') {|q| q.echo = false }
end
@db = Mysql2::Client.new(host: options.host, username: options.username,
password: options.password, database: options.database)
@default_db_connection = create_db_connection
end
def execute
authorized_extensions = SiteSetting.authorized_extensions
SiteSetting.authorized_extensions = "*"
import_groups
import_users
import_categories
import_posts
postprocess_posts
ensure
SiteSetting.authorized_extensions = authorized_extensions
end
def import_groups
@ -227,12 +230,16 @@ class ImportScripts::Smf2 < ImportScripts::Base
print "\r#{spinner.next}"
end
db2 = create_db_connection
create_posts(query(<<-SQL), total: total) do |message|
SELECT m.id_msg, m.id_topic, m.id_member, m.poster_time, m.body, o.ignore_quotes,
m.subject, t.id_board, t.id_first_msg
m.subject, t.id_board, t.id_first_msg, COUNT(a.id_attach) AS attachment_count
FROM {prefix}messages AS m
LEFT JOIN {prefix}import_message_order AS o ON o.message_id = m.id_msg
LEFT JOIN {prefix}topics AS t ON t.id_topic = m.id_topic
LEFT JOIN {prefix}attachments AS a ON a.id_msg = m.id_msg AND a.attachment_type = 0
GROUP BY m.id_msg
ORDER BY o.message_order ASC
SQL
skip = false
@ -240,7 +247,6 @@ class ImportScripts::Smf2 < ImportScripts::Base
post = {
id: message[:id_msg],
user_id: user_id_from_imported_user_id(message[:id_member]) || -1,
raw: convert_message_body(message[:body], ignore_quotes: ignore_quotes),
created_at: Time.zone.at(message[:poster_time]),
post_create_action: ignore_quotes && proc do |post|
post.custom_fields['import_rebake'] = 't'
@ -259,10 +265,29 @@ class ImportScripts::Smf2 < ImportScripts::Base
skip = true
end
end
skip ? nil : post
next nil if skip
attachments = message[:attachment_count] == 0 ? [] : query(<<-SQL, connection: db2, as: :array)
SELECT id_attach, file_hash, filename FROM {prefix}attachments
WHERE attachment_type = 0 AND id_msg = #{message[:id_msg]}
ORDER BY id_attach ASC
SQL
attachments.map! {|a| import_attachment(post, a) rescue (puts $! ; nil) }
post[:raw] = convert_message_body(message[:body], attachments, ignore_quotes: ignore_quotes)
next post
end
end
def import_attachment(post, attachment)
path = find_smf_attachment_path(attachment[:id_attach], attachment[:file_hash], attachment[:filename])
raise "Attachment for post #{post[:id]} failed: #{attachment[:filename]}" unless path.present?
upload = create_upload(post[:user_id], path, attachment[:filename])
raise "Attachment for post #{post[:id]} failed: #{upload.errors.full_messages.join(', ')}" unless upload.persisted?
return upload
rescue SystemCallError => err
raise "Attachment for post #{post[:id]} failed: #{err.message}"
end
def postprocess_posts
puts '', 'rebaking posts'
@ -284,20 +309,38 @@ class ImportScripts::Smf2 < ImportScripts::Base
private
def query(sql, **opts, &block)
return __query(sql).to_a if opts[:as] == :array
return __query(sql, as: :array).first[0] if opts[:as] == :single
return __query(sql, stream: true).each(&block) if block_given?
return __query(sql, stream: true)
def create_db_connection
Mysql2::Client.new(host: options.host, username: options.username,
password: options.password, database: options.database)
end
def __query(sql, **opts)
@db.query(sql.gsub('{prefix}', options.prefix),
def query(sql, **opts, &block)
db = opts[:connection] || @default_db_connection
return __query(db, sql).to_a if opts[:as] == :array
return __query(db, sql, as: :array).first[0] if opts[:as] == :single
return __query(db, sql, stream: true).each(&block) if block_given?
return __query(db, sql, stream: true)
end
def __query(db, sql, **opts)
db.query(sql.gsub('{prefix}', options.prefix),
{symbolize_keys: true, cache_rows: false}.merge(opts))
end
TRTR_TABLE = begin
from = "ŠŽšžŸÀÁÂÃÄÅÇÈÉÊËÌÍÎÏÑÒÓÔÕÖØÙÚÛÜÝàáâãäåçèéêëìíîïñòóôõöøùúûüýÿ"
to = "SZszYAAAAAACEEEEIIIINOOOOOOUUUUYaaaaaaceeeeiiiinoooooouuuuyy"
from.chars.zip(to.chars)
end
def find_smf_attachment_path(attachment_id, file_hash, filename)
[ filename, "#{attachment_id}_#{file_hash}" ]
cleaned_name = filename.dup
TRTR_TABLE.each {|from,to| cleaned_name.gsub!(from, to) }
cleaned_name.gsub!(/\s/, '_')
cleaned_name.gsub!(/[^\w_\.\-]/, '')
legacy_name = "#{attachment_id}_#{cleaned_name.gsub('.', '_')}#{Digest::MD5.hexdigest(cleaned_name)}"
[ filename, "#{attachment_id}_#{file_hash}", legacy_name ]
.map {|name| File.join(options.smfroot, 'attachments', name) }
.detect {|file| File.exists?(file) }
end
@ -319,10 +362,57 @@ class ImportScripts::Smf2 < ImportScripts::Base
s.lines.each {|l| r << '[li]' << l.strip.sub(/^\[x\]\s*/, '') << '[/li]' }
r << "[/ul]\n"
end
# TODO: attachments
if attachments.present?
use_count = Hash.new(0)
AttachmentPatterns.each do |p|
pattern, emitter = *p
body.gsub!(pattern) do |s|
next s unless (num = $~[:num].to_i - 1) >= 0
next s unless (upload = attachments[num]).present?
use_count[num] += 1
instance_exec(upload, &emitter)
end
end
if use_count.keys.length < attachments.select(&:present?).length
body << "\n\n---"
attachments.each_with_index do |upload, num|
if upload.present? and use_count[num] == 0
body << ( "\n\n" + get_upload_markdown(upload) )
end
end
end
end
return opts[:ignore_quotes] ? body : convert_quotes(body)
end
def v8
@ctx ||= begin
ctx = PrettyText.create_new_context
PrettyText.decorate_context(ctx)
# provides toHumanSize but restores I18n.t which we need to fix again
ctx.load(Rails.root + "app/assets/javascripts/locales/i18n.js")
helper = PrettyText::Helpers.new
ctx['I18n']['t'] = proc {|_,key,opts| helper.t(key, opts) }
# from i18n_helpers.js -- can't load it directly because Ember is missing
ctx.eval(<<-'end')
var oldI18ntoHumanSize = I18n.toHumanSize;
I18n.toHumanSize = function(number, options) {
options = options || {};
options.format = I18n.t("number.human.storage_units.format");
return oldI18ntoHumanSize.apply(this, [number, options]);
};
end
ctx
end
end
def get_upload_markdown(upload)
@func ||= v8.eval("Discourse.Utilities.getUploadMarkdown")
return @func.call(upload).to_s
end
def convert_quotes(body)
body.to_s.gsub(QuotePattern) do |s|
inner = $~[:inner].strip
@ -398,6 +488,11 @@ class ImportScripts::Smf2 < ImportScripts::Base
QuotePattern = build_nested_tag_regex('quote')
ColorPattern = build_nested_tag_regex('color')
ListPattern = build_nested_tag_regex('list')
AttachmentPatterns = [
[/^\[attach(?:|img|url|mini)=(?<num>\d+)\]$/, ->(u) { "\n"+get_upload_markdown(u)+"\n" }],
[/\[attach(?:|img|url|mini)=(?<num>\d+)\]/, ->(u) { get_upload_markdown(u) }]
]
# Provides command line options and parses the SMF settings file.
class Options
@ -405,12 +500,6 @@ class ImportScripts::Smf2 < ImportScripts::Base
class Error < StandardError ; end
class SettingsError < Error ; end
def initialize
self.host = 'localhost'
self.username = Etc.getlogin
self.prefix = 'smf_'
end
def parse!(args = ARGV)
raise Error, 'not enough arguments' if ARGV.empty?
begin
@ -421,6 +510,10 @@ class ImportScripts::Smf2 < ImportScripts::Base
raise Error, 'too many arguments' if args.length > 1
self.smfroot = args.first
read_smf_settings if self.smfroot
self.host ||= 'localhost'
self.username ||= Etc.getlogin
self.prefix ||= 'smf_'
end
def usage