diff --git a/script/import_scripts/base.rb b/script/import_scripts/base.rb
index 093875493cd..8a3d30225eb 100644
--- a/script/import_scripts/base.rb
+++ b/script/import_scripts/base.rb
@@ -7,13 +7,13 @@ if ARGV.include?('bbcode-to-md')
# git clone https://github.com/nlalonde/ruby-bbcode-to-md.git
# cd ruby-bbcode-to-md
# gem build ruby-bbcode-to-md.gemspec
- # gem install ruby-bbcode-to-md-0.0.13.gem
+ # gem install ruby-bbcode-to-md-*.gem
require 'ruby-bbcode-to-md'
end
require_relative '../../config/environment'
-require_dependency 'url_helper'
-require_dependency 'file_helper'
+require_relative 'base/lookup_container'
+require_relative 'base/uploader'
module ImportScripts; end
@@ -24,46 +24,13 @@ class ImportScripts::Base
def initialize
preload_i18n
- @bbcode_to_md = true if ARGV.include?('bbcode-to-md')
- @existing_groups = {}
- @failed_groups = []
- @existing_users = {}
- @failed_users = []
- @categories_lookup = {}
- @existing_posts = {}
- @topic_lookup = {}
- @site_settings_during_import = nil
+ @lookup = ImportScripts::LookupContainer.new
+ @uploader = ImportScripts::Uploader.new
+
+ @bbcode_to_md = true if use_bbcode_to_md?
+ @site_settings_during_import = {}
@old_site_settings = {}
- @start_time = Time.now
-
- puts "loading existing groups..."
- GroupCustomField.where(name: 'import_id').pluck(:group_id, :value).each do |group_id, import_id|
- @existing_groups[import_id] = group_id
- end
-
- puts "loading existing users..."
- UserCustomField.where(name: 'import_id').pluck(:user_id, :value).each do |user_id, import_id|
- @existing_users[import_id] = user_id
- end
-
- puts "loading existing categories..."
- CategoryCustomField.where(name: 'import_id').pluck(:category_id, :value).each do |category_id, import_id|
- @categories_lookup[import_id] = category_id
- end
-
- puts "loading existing posts..."
- PostCustomField.where(name: 'import_id').pluck(:post_id, :value).each do |post_id, import_id|
- @existing_posts[import_id] = post_id
- end
-
- puts "loading existing topics..."
- Post.joins(:topic).pluck("posts.id, posts.topic_id, posts.post_number, topics.slug").each do |p|
- @topic_lookup[p[0]] = {
- topic_id: p[1],
- post_number: p[2],
- url: Post.url(p[3], p[1], p[2]),
- }
- end
+ @start_times = {import: Time.now}
end
def preload_i18n
@@ -87,15 +54,15 @@ class ImportScripts::Base
update_topic_count_replies
reset_topic_counters
- elapsed = Time.now - @start_time
- puts '', "Done (#{elapsed.to_s} seconds)"
+ elapsed = Time.now - @start_times[:import]
+ puts '', '', 'Done (%02dh %02dmin %02dsec)' % [elapsed/3600, elapsed/60%60, elapsed%60]
ensure
reset_site_settings
end
- def change_site_settings
- @site_settings_during_import = {
+ def get_site_settings_for_import
+ {
email_domains_blacklist: '',
min_topic_title_length: 1,
min_post_length: 1,
@@ -106,6 +73,10 @@ class ImportScripts::Base
disable_emails: true,
authorized_extensions: '*'
}
+ end
+
+ def change_site_settings
+ @site_settings_during_import = get_site_settings_for_import
@site_settings_during_import.each do |key, value|
@old_site_settings[key] = SiteSetting.send(key)
@@ -124,44 +95,42 @@ class ImportScripts::Base
RateLimiter.enable
end
+ def use_bbcode_to_md?
+ ARGV.include?("bbcode-to-md")
+ end
+
# Implementation will do most of its work in its execute method.
# It will need to call create_users, create_categories, and create_posts.
def execute
raise NotImplementedError
end
- # Get the Discourse Post id based on the id of the source record
def post_id_from_imported_post_id(import_id)
- @existing_posts[import_id] || @existing_posts[import_id.to_s]
+ @lookup.post_id_from_imported_post_id(import_id)
end
- # Get the Discourse topic info (a hash) based on the id of the source record
def topic_lookup_from_imported_post_id(import_id)
- post_id = post_id_from_imported_post_id(import_id)
- post_id ? @topic_lookup[post_id] : nil
+ @lookup.topic_lookup_from_imported_post_id(import_id)
end
- # Get the Discourse Group id based on the id of the source group
def group_id_from_imported_group_id(import_id)
- @existing_groups[import_id] || @existing_groups[import_id.to_s] || find_group_by_import_id(import_id).try(:id)
+ @lookup.group_id_from_imported_group_id(import_id)
end
def find_group_by_import_id(import_id)
- GroupCustomField.where(name: 'import_id', value: import_id.to_s).first.try(:group)
+ @lookup.find_group_by_import_id(import_id)
end
- # Get the Discourse User id based on the id of the source user
def user_id_from_imported_user_id(import_id)
- @existing_users[import_id] || @existing_users[import_id.to_s] || find_user_by_import_id(import_id).try(:id)
+ @lookup.user_id_from_imported_user_id(import_id)
end
def find_user_by_import_id(import_id)
- UserCustomField.where(name: 'import_id', value: import_id.to_s).first.try(:user)
+ @lookup.find_user_by_import_id(import_id)
end
- # Get the Discourse Category id based on the id of the source category
def category_id_from_imported_category_id(import_id)
- @categories_lookup[import_id] || @categories_lookup[import_id.to_s]
+ @lookup.category_id_from_imported_category_id(import_id)
end
def create_admin(opts={})
@@ -183,31 +152,32 @@ class ImportScripts::Base
# group in the original datasource. The given id will not be used
# to create the Discourse group record.
def create_groups(results, opts={})
- groups_created = 0
- groups_skipped = 0
+ created = 0
+ skipped = 0
+ failed = 0
total = opts[:total] || results.size
results.each do |result|
g = yield(result)
- if group_id_from_imported_group_id(g[:id])
- groups_skipped += 1
+ if @lookup.group_id_from_imported_group_id(g[:id])
+ skipped += 1
else
new_group = create_group(g, g[:id])
if new_group.valid?
- @existing_groups[g[:id].to_s] = new_group.id
- groups_created += 1
+ @lookup.add_group(g[:id].to_s, new_group)
+ created += 1
else
- @failed_groups << g
+ failed += 1
puts "Failed to create group id #{g[:id]} #{new_group.name}: #{new_group.errors.full_messages}"
end
end
- print_status groups_created + groups_skipped + @failed_groups.length + (opts[:offset] || 0), total
+ print_status created + skipped + failed + (opts[:offset] || 0), total
end
- return [groups_created, groups_skipped]
+ [created, skipped]
end
def create_group(opts, import_id)
@@ -231,8 +201,9 @@ class ImportScripts::Base
# user in the original datasource. The given id will not be used to
# create the Discourse user record.
def create_users(results, opts={})
- users_created = 0
- users_skipped = 0
+ created = 0
+ skipped = 0
+ failed = 0
total = opts[:total] || results.size
results.each do |result|
@@ -240,34 +211,34 @@ class ImportScripts::Base
# block returns nil to skip a user
if u.nil?
- users_skipped += 1
+ skipped += 1
else
import_id = u[:id]
- if user_id_from_imported_user_id(import_id)
- users_skipped += 1
+ if @lookup.user_id_from_imported_user_id(import_id)
+ skipped += 1
elsif u[:email].present?
new_user = create_user(u, import_id)
if new_user.valid? && new_user.user_profile.valid?
- @existing_users[import_id.to_s] = new_user.id
- users_created += 1
+ @lookup.add_user(import_id.to_s, new_user)
+ created += 1
else
- @failed_users << u
+ failed += 1
puts "Failed to create user id: #{import_id}, username: #{new_user.username}, email: #{new_user.email}"
puts "user errors: #{new_user.errors.full_messages}"
puts "user_profile errors: #{new_user.user_profiler.errors.full_messages}"
end
else
- @failed_users << u
+ failed += 1
puts "Skipping user id #{import_id} because email is blank"
end
end
- print_status users_created + users_skipped + @failed_users.length + (opts[:offset] || 0), total
+ print_status created + skipped + failed + (opts[:offset] || 0), total
end
- return [users_created, users_skipped]
+ [created, skipped]
end
def create_user(opts, import_id)
@@ -334,29 +305,39 @@ class ImportScripts::Base
# create the Discourse category record.
# Optional attributes are position, description, and parent_category_id.
def create_categories(results)
+ created = 0
+ skipped = 0
+ total = results.size
+
results.each do |c|
params = yield(c)
# block returns nil to skip
- next if params.nil? || category_id_from_imported_category_id(params[:id])
+ if params.nil? || @lookup.category_id_from_imported_category_id(params[:id])
+ skipped += 1
+ else
+ # Basic massaging on the category name
+ params[:name] = "Blank" if params[:name].blank?
+ params[:name].strip!
+ params[:name] = params[:name][0..49]
- # Basic massaging on the category name
- params[:name] = "Blank" if params[:name].blank?
- params[:name].strip!
- params[:name] = params[:name][0..49]
+ # make sure categories don't go more than 2 levels deep
+ if params[:parent_category_id]
+ top = Category.find_by_id(params[:parent_category_id])
+ top = top.parent_category while top && !top.parent_category.nil?
+ params[:parent_category_id] = top.id if top
+ end
- puts "\t#{params[:name]}"
+ new_category = create_category(params, params[:id])
+ @lookup.add_category(params[:id], new_category)
- # make sure categories don't go more than 2 levels deep
- if params[:parent_category_id]
- top = Category.find_by_id(params[:parent_category_id])
- top = top.parent_category while top && !top.parent_category.nil?
- params[:parent_category_id] = top.id if top
+ created += 1
end
- new_category = create_category(params, params[:id])
- @categories_lookup[params[:id]] = new_category.id
+ print_status created + skipped, total
end
+
+ [created, skipped]
end
def create_category(opts, import_id)
@@ -396,6 +377,7 @@ class ImportScripts::Base
skipped = 0
created = 0
total = opts[:total] || results.size
+ start_time = get_start_time("posts-#{total}") # the post count should be unique enough to differentiate between posts and PMs
results.each do |r|
params = yield(r)
@@ -406,18 +388,14 @@ class ImportScripts::Base
else
import_id = params.delete(:id).to_s
- if post_id_from_imported_post_id(import_id)
+ if @lookup.post_id_from_imported_post_id(import_id)
skipped += 1 # already imported this post
else
begin
new_post = create_post(params, import_id)
if new_post.is_a?(Post)
- @existing_posts[import_id] = new_post.id
- @topic_lookup[new_post.id] = {
- post_number: new_post.post_number,
- topic_id: new_post.topic_id,
- url: new_post.url,
- }
+ @lookup.add_post(import_id, new_post)
+ @lookup.add_topic(new_post)
created_post(new_post)
@@ -439,10 +417,10 @@ class ImportScripts::Base
end
end
- print_status skipped + created + (opts[:offset] || 0), total
+ print_status(created + skipped + (opts[:offset] || 0), total, start_time)
end
- return [created, skipped]
+ [created, skipped]
end
def create_post(opts, import_id)
@@ -463,19 +441,8 @@ class ImportScripts::Base
post ? post : post_creator.errors.full_messages
end
- # Creates an upload.
- # Expects path to be the full path and filename of the source file.
def create_upload(user_id, path, source_filename)
- tmp = Tempfile.new('discourse-upload')
- src = File.open(path)
- FileUtils.copy_stream(src, tmp)
- src.close
- tmp.rewind
-
- Upload.create_for(user_id, tmp, source_filename, tmp.size)
- ensure
- tmp.close rescue nil
- tmp.unlink rescue nil
+ @uploader.create_upload(user_id, path, source_filename)
end
# Iterate through a list of bookmark records to be imported.
@@ -484,8 +451,8 @@ class ImportScripts::Base
# Required fields are :user_id and :post_id, where both ids are
# the values in the original datasource.
def create_bookmarks(results, opts={})
- bookmarks_created = 0
- bookmarks_skipped = 0
+ created = 0
+ skipped = 0
total = opts[:total] || results.size
user = User.new
@@ -495,23 +462,29 @@ class ImportScripts::Base
params = yield(result)
# only the IDs are needed, so this should be enough
- user.id = user_id_from_imported_user_id(params[:user_id])
- post.id = post_id_from_imported_post_id(params[:post_id])
-
- if user.id.nil? || post.id.nil?
- bookmarks_skipped += 1
- puts "Skipping bookmark for user id #{params[:user_id]} and post id #{params[:post_id]}"
+ if params.nil?
+ skipped += 1
else
- begin
- PostAction.act(user, post, PostActionType.types[:bookmark])
- bookmarks_created += 1
- rescue PostAction::AlreadyActed
- bookmarks_skipped += 1
- end
+ user.id = @lookup.user_id_from_imported_user_id(params[:user_id])
+ post.id = @lookup.post_id_from_imported_post_id(params[:post_id])
- print_status bookmarks_created + bookmarks_skipped + (opts[:offset] || 0), total
+ if user.id.nil? || post.id.nil?
+ skipped += 1
+ puts "Skipping bookmark for user id #{params[:user_id]} and post id #{params[:post_id]}"
+ else
+ begin
+ PostAction.act(user, post, PostActionType.types[:bookmark])
+ created += 1
+ rescue PostAction::AlreadyActed
+ skipped += 1
+ end
+ end
end
+
+ print_status created + skipped + (opts[:offset] || 0), total
end
+
+ [created, skipped]
end
def close_inactive_topics(opts={})
@@ -633,23 +606,26 @@ class ImportScripts::Base
end
def html_for_upload(upload, display_filename)
- if FileHelper.is_image?(upload.url)
- embedded_image_html(upload)
- else
- attachment_html(upload, display_filename)
- end
+ @uploader.html_for_upload(upload, display_filename)
end
def embedded_image_html(upload)
- %Q[
]
+ @uploader.embedded_image_html(upload)
end
def attachment_html(upload, display_filename)
- "#{display_filename} (#{number_to_human_size(upload.filesize)})"
+ @uploader.attachment_html(upload, display_filename)
end
- def print_status(current, max)
- print "\r%9d / %d (%5.1f%%) " % [current, max, ((current.to_f / max.to_f) * 100).round(1)]
+ def print_status(current, max, start_time = nil)
+ if start_time.present?
+ elapsed_seconds = Time.now - start_time
+ elements_per_minute = '[%.0f items/min] ' % [current / elapsed_seconds.to_f * 60]
+ else
+ elements_per_minute = ''
+ end
+
+ print "\r%9d / %d (%5.1f%%) %s" % [current, max, current / max.to_f * 100, elements_per_minute]
end
def print_spinner
@@ -658,6 +634,10 @@ class ImportScripts::Base
print "\b#{@spinner_chars[0]}"
end
+ def get_start_time(key)
+ @start_times.fetch(key) {|k| @start_times[k] = Time.now}
+ end
+
def batches(batch_size)
offset = 0
loop do
diff --git a/script/import_scripts/base/lookup_container.rb b/script/import_scripts/base/lookup_container.rb
new file mode 100644
index 00000000000..0d8070932ae
--- /dev/null
+++ b/script/import_scripts/base/lookup_container.rb
@@ -0,0 +1,99 @@
+module ImportScripts
+ class LookupContainer
+ def initialize
+ puts 'loading existing groups...'
+ @groups = {}
+ GroupCustomField.where(name: 'import_id').pluck(:group_id, :value).each do |group_id, import_id|
+ @groups[import_id] = group_id
+ end
+
+ puts 'loading existing users...'
+ @users = {}
+ UserCustomField.where(name: 'import_id').pluck(:user_id, :value).each do |user_id, import_id|
+ @users[import_id] = user_id
+ end
+
+ puts 'loading existing categories...'
+ @categories = {}
+ CategoryCustomField.where(name: 'import_id').pluck(:category_id, :value).each do |category_id, import_id|
+ @categories[import_id] = category_id
+ end
+
+ puts 'loading existing posts...'
+ @posts = {}
+ PostCustomField.where(name: 'import_id').pluck(:post_id, :value).each do |post_id, import_id|
+ @posts[import_id] = post_id
+ end
+
+ puts 'loading existing topics...'
+ @topics = {}
+ Post.joins(:topic).pluck('posts.id, posts.topic_id, posts.post_number, topics.slug').each do |p|
+ @topics[p[0]] = {
+ topic_id: p[1],
+ post_number: p[2],
+ url: Post.url(p[3], p[1], p[2])
+ }
+ end
+ end
+
+ # Get the Discourse Post id based on the id of the source record
+ def post_id_from_imported_post_id(import_id)
+ @posts[import_id] || @posts[import_id.to_s]
+ end
+
+ # Get the Discourse topic info (a hash) based on the id of the source record
+ def topic_lookup_from_imported_post_id(import_id)
+ post_id = post_id_from_imported_post_id(import_id)
+ post_id ? @topics[post_id] : nil
+ end
+
+ # Get the Discourse Group id based on the id of the source group
+ def group_id_from_imported_group_id(import_id)
+ @groups[import_id] || @groups[import_id.to_s] || find_group_by_import_id(import_id).try(:id)
+ end
+
+ # Get the Discourse Group based on the id of the source group
+ def find_group_by_import_id(import_id)
+ GroupCustomField.where(name: 'import_id', value: import_id.to_s).first.try(:group)
+ end
+
+ # Get the Discourse User id based on the id of the source user
+ def user_id_from_imported_user_id(import_id)
+ @users[import_id] || @users[import_id.to_s] || find_user_by_import_id(import_id).try(:id)
+ end
+
+ # Get the Discourse User based on the id of the source user
+ def find_user_by_import_id(import_id)
+ UserCustomField.where(name: 'import_id', value: import_id.to_s).first.try(:user)
+ end
+
+ # Get the Discourse Category id based on the id of the source category
+ def category_id_from_imported_category_id(import_id)
+ @categories[import_id] || @categories[import_id.to_s]
+ end
+
+ def add_group(import_id, group)
+ @groups[import_id] = group.id
+ end
+
+ def add_user(import_id, user)
+ @users[import_id] = user.id
+ end
+
+ def add_category(import_id, category)
+ @categories[import_id] = category.id
+ end
+
+ def add_post(import_id, post)
+ @posts[import_id] = post.id
+ end
+
+ def add_topic(post)
+ @topics[post.id] = {
+ post_number: post.post_number,
+ topic_id: post.topic_id,
+ url: post.url,
+ }
+ end
+ end
+end
diff --git a/script/import_scripts/base/uploader.rb b/script/import_scripts/base/uploader.rb
new file mode 100644
index 00000000000..62ddac451d7
--- /dev/null
+++ b/script/import_scripts/base/uploader.rb
@@ -0,0 +1,45 @@
+require_dependency 'url_helper'
+require_dependency 'file_helper'
+
+module ImportScripts
+ class Uploader
+ include ActionView::Helpers::NumberHelper
+
+ # Creates an upload.
+ # Expects path to be the full path and filename of the source file.
+ # @return [Upload]
+ def create_upload(user_id, path, source_filename)
+ tmp = Tempfile.new('discourse-upload')
+ src = File.open(path)
+ FileUtils.copy_stream(src, tmp)
+ src.close
+ tmp.rewind
+
+ Upload.create_for(user_id, tmp, source_filename, tmp.size)
+ rescue => e
+ Rails.logger.error("Failed to create upload: #{e}")
+ nil
+ ensure
+ tmp.close rescue nil
+ tmp.unlink rescue nil
+ end
+
+ def html_for_upload(upload, display_filename)
+ if FileHelper.is_image?(upload.url)
+ embedded_image_html(upload)
+ else
+ attachment_html(upload, display_filename)
+ end
+ end
+
+ def embedded_image_html(upload)
+ image_width = [upload.width, SiteSetting.max_image_width].compact.min
+ image_height = [upload.height, SiteSetting.max_image_height].compact.min
+ %Q[
]
+ end
+
+ def attachment_html(upload, display_filename)
+ "#{display_filename} (#{number_to_human_size(upload.filesize)})"
+ end
+ end
+end