Merge pull request #3800 from riking/udacity_import

Import script infrastructure - batch skipping, usernames
This commit is contained in:
Jeff Atwood 2015-09-21 17:15:56 -07:00
commit 314dca0ac1
20 changed files with 112 additions and 12 deletions

View File

@ -110,6 +110,8 @@ class ImportScripts::MyAskBot < ImportScripts::Base
break if users.ntuples() < 1 break if users.ntuples() < 1
next if all_records_exist? :users, users.map {|u| u["id"].to_i}
create_users(users, total: total_count, offset: offset) do |user| create_users(users, total: total_count, offset: offset) do |user|
{ {
id: user["id"], id: user["id"],
@ -153,6 +155,8 @@ class ImportScripts::MyAskBot < ImportScripts::Base
break if posts.ntuples() < 1 break if posts.ntuples() < 1
next if all_records_exist? :posts, posts.map {|p| p["id"].to_i}
create_posts(posts, total: post_count, offset: offset) do |post| create_posts(posts, total: post_count, offset: offset) do |post|
pid = post["id"] pid = post["id"]
tid = post["thread_id"].to_i tid = post["thread_id"].to_i
@ -206,6 +210,8 @@ class ImportScripts::MyAskBot < ImportScripts::Base
break if posts.ntuples() < 1 break if posts.ntuples() < 1
next if all_records_exist? :posts, posts.map {|p| p["id"].to_i}
create_posts(posts, total: post_count, offset: offset) do |post| create_posts(posts, total: post_count, offset: offset) do |post|
tid = post["thread_id"].to_i tid = post["thread_id"].to_i
next unless thread = @thread_parents[tid] next unless thread = @thread_parents[tid]

View File

@ -194,6 +194,23 @@ class ImportScripts::Base
g.tap(&:save) g.tap(&:save)
end end
def all_records_exist?(type, import_ids)
return false if import_ids.empty?
existing = "#{type.to_s.classify}CustomField".constantize.where(name: 'import_id')
if Fixnum === import_ids.first
existing = existing.where('cast(value as int) in (?)', import_ids)
else
existing = existing.where('value in (?)', import_ids)
end
if existing.count == import_ids.length
# puts "Skipping #{import_ids.length} already imported #{type}"
true
end
end
# Iterate through a list of user records to be imported. # Iterate through a list of user records to be imported.
# Takes a collection, and yields to the block for each element. # Takes a collection, and yields to the block for each element.
# Block should return a hash with the attributes for the User model. # Block should return a hash with the attributes for the User model.
@ -258,9 +275,8 @@ class ImportScripts::Base
if opts[:username].blank? || if opts[:username].blank? ||
opts[:username].length < User.username_length.begin || opts[:username].length < User.username_length.begin ||
opts[:username].length > User.username_length.end || opts[:username].length > User.username_length.end ||
opts[:username] =~ /[^A-Za-z0-9_]/ || !User.username_available?(opts[:username]) ||
opts[:username][0] =~ /[^A-Za-z0-9]/ || !UsernameValidator.new(opts[:username]).valid_format?
!User.username_available?(opts[:username])
opts[:username] = UserNameSuggester.suggest(opts[:username] || opts[:name] || opts[:email]) opts[:username] = UserNameSuggester.suggest(opts[:username] || opts[:name] || opts[:email])
end end
opts[:email] = opts[:email].downcase opts[:email] = opts[:email].downcase
@ -289,14 +305,19 @@ class ImportScripts::Base
if opts[:active] && opts[:password].present? if opts[:active] && opts[:password].present?
u.activate u.activate
end end
rescue rescue => e
# try based on email # try based on email
if e.record.errors.messages[:email].present?
existing = User.find_by(email: opts[:email].downcase) existing = User.find_by(email: opts[:email].downcase)
if existing if existing
existing.custom_fields["import_id"] = import_id existing.custom_fields["import_id"] = import_id
existing.save! existing.save!
u = existing u = existing
end end
else
puts "Error on record: #{opts}"
raise e
end
end end
post_create_action.try(:call, u) if u.persisted? post_create_action.try(:call, u) if u.persisted?

View File

@ -85,6 +85,8 @@ class ImportScripts::Bbpress < ImportScripts::Base
break if results.size < 1 break if results.size < 1
next if all_records_exist? :posts, results.map {|p| p["id"].to_i}
create_posts(results, total: total_count, offset: offset) do |post| create_posts(results, total: total_count, offset: offset) do |post|
skip = false skip = false
mapped = {} mapped = {}

View File

@ -98,6 +98,8 @@ class ImportScripts::DiscuzX < ImportScripts::Base
break if results.size < 1 break if results.size < 1
next if all_records_exist? :users, users.map {|u| u["id"].to_i}
create_users(results, total: total_count, offset: offset) do |user| create_users(results, total: total_count, offset: offset) do |user|
{ id: user['id'], { id: user['id'],
email: user['email'], email: user['email'],
@ -205,6 +207,8 @@ class ImportScripts::DiscuzX < ImportScripts::Base
break if results.size < 1 break if results.size < 1
next if all_records_exist? :posts, results.map {|p| p["id"].to_i}
create_posts(results, total: total_count, offset: offset) do |m| create_posts(results, total: total_count, offset: offset) do |m|
skip = false skip = false
mapped = {} mapped = {}
@ -281,6 +285,8 @@ class ImportScripts::DiscuzX < ImportScripts::Base
break if results.size < 1 break if results.size < 1
next if all_records_exist? :posts, results.map {|m| "pm:#{m['id']}"}
create_posts(results, total: total_count, offset: offset) do |m| create_posts(results, total: total_count, offset: offset) do |m|
skip = false skip = false
mapped = {} mapped = {}

View File

@ -121,6 +121,8 @@ class ImportScripts::Drupal < ImportScripts::Base
break if results.size < 1 break if results.size < 1
next if all_records_exist? :posts, results.map {|p| "nid:#{p['nid']}"}
create_posts(results, total: total_count, offset: offset) do |row| create_posts(results, total: total_count, offset: offset) do |row|
{ {
id: "nid:#{row['nid']}", id: "nid:#{row['nid']}",
@ -167,6 +169,8 @@ class ImportScripts::Drupal < ImportScripts::Base
break if results.size < 1 break if results.size < 1
next if all_records_exist? :posts, results.map {|p| "cid:#{p['cid']}"}
create_posts(results, total: total_count, offset: offset) do |row| create_posts(results, total: total_count, offset: offset) do |row|
topic_mapping = topic_lookup_from_imported_post_id("nid:#{row['nid']}") topic_mapping = topic_lookup_from_imported_post_id("nid:#{row['nid']}")
if topic_mapping && topic_id = topic_mapping[:topic_id] if topic_mapping && topic_id = topic_mapping[:topic_id]

View File

@ -56,6 +56,8 @@ class ImportScripts::DrupalQA < ImportScripts::Drupal
break if results.size < 1 break if results.size < 1
next if all_records_exist? :posts, results.map {|p| "nid:#{p['nid']}"}
create_posts(results, total: total_count, offset: offset) do |row| create_posts(results, total: total_count, offset: offset) do |row|
{ {
id: "nid:#{row['nid']}", id: "nid:#{row['nid']}",
@ -100,6 +102,8 @@ class ImportScripts::DrupalQA < ImportScripts::Drupal
break if results.size < 1 break if results.size < 1
next if all_records_exist? :posts, results.map {|p| "cid:#{p['cid']}"}
create_posts(results, total: total_count, offset: offset) do |row| create_posts(results, total: total_count, offset: offset) do |row|
topic_mapping = topic_lookup_from_imported_post_id("nid:#{row['nid']}") topic_mapping = topic_lookup_from_imported_post_id("nid:#{row['nid']}")
if topic_mapping && topic_id = topic_mapping[:topic_id] if topic_mapping && topic_id = topic_mapping[:topic_id]
@ -151,6 +155,8 @@ class ImportScripts::DrupalQA < ImportScripts::Drupal
break if results.size < 1 break if results.size < 1
next if all_records_exist? :posts, results.map {|p| "cid:#{p['cid']}"}
create_posts(results, total: total_count, offset: offset) do |row| create_posts(results, total: total_count, offset: offset) do |row|
topic_mapping = topic_lookup_from_imported_post_id("nid:#{row['nid']}") topic_mapping = topic_lookup_from_imported_post_id("nid:#{row['nid']}")
if topic_mapping && topic_id = topic_mapping[:topic_id] if topic_mapping && topic_id = topic_mapping[:topic_id]
@ -201,6 +207,8 @@ class ImportScripts::DrupalQA < ImportScripts::Drupal
break if results.size < 1 break if results.size < 1
next if all_records_exist? :posts, results.map {|p| "cid:#{p['cid']}"}
create_posts(results, total: total_count, offset: offset) do |row| create_posts(results, total: total_count, offset: offset) do |row|
topic_mapping = topic_lookup_from_imported_post_id("nid:#{row['nid']}") topic_mapping = topic_lookup_from_imported_post_id("nid:#{row['nid']}")
if topic_mapping && topic_id = topic_mapping[:topic_id] if topic_mapping && topic_id = topic_mapping[:topic_id]

View File

@ -109,6 +109,8 @@ class ImportScripts::Kunena < ImportScripts::Base
break if results.size < 1 break if results.size < 1
next if all_records_exist? :posts, results.map {|p| p['id'].to_i}
create_posts(results, total: total_count, offset: offset) do |m| create_posts(results, total: total_count, offset: offset) do |m|
skip = false skip = false
mapped = {} mapped = {}

View File

@ -109,6 +109,8 @@ class ImportScripts::Kunena < ImportScripts::Base
break if results.size < 1 break if results.size < 1
next if all_records_exist? :posts, results.map {|p| p['id'].to_i}
create_posts(results, total: total_count, offset: offset) do |m| create_posts(results, total: total_count, offset: offset) do |m|
skip = false skip = false
mapped = {} mapped = {}

View File

@ -101,6 +101,8 @@ class ImportScripts::Lithium < ImportScripts::Base
break if users.size < 1 break if users.size < 1
next if all_records_exist? :users, users.map {|u| u["id"].to_i}
create_users(users, total: user_count, offset: offset) do |user| create_users(users, total: user_count, offset: offset) do |user|
{ {
@ -274,9 +276,10 @@ class ImportScripts::Lithium < ImportScripts::Base
OFFSET #{offset} OFFSET #{offset}
SQL SQL
break if topics.size < 1 break if topics.size < 1
next if all_records_exist? :posts, topics.map {|topic| "#{topic["node_id"]} #{topic["id"]}"}
create_posts(topics, total: topic_count, offset: offset) do |topic| create_posts(topics, total: topic_count, offset: offset) do |topic|
category_id = category_id_from_imported_category_id(topic["node_id"]) category_id = category_id_from_imported_category_id(topic["node_id"])
@ -322,6 +325,8 @@ class ImportScripts::Lithium < ImportScripts::Base
break if posts.size < 1 break if posts.size < 1
next if all_records_exist? :posts, posts.map {|post| "#{post["node_id"]} #{post["root_id"]} #{post["id"]}"}
create_posts(posts, total: post_count, offset: offset) do |post| create_posts(posts, total: post_count, offset: offset) do |post|
raw = post["raw"] raw = post["raw"]
next unless topic = topic_lookup_from_imported_post_id("#{post["node_id"]} #{post["root_id"]}") next unless topic = topic_lookup_from_imported_post_id("#{post["node_id"]} #{post["root_id"]}")
@ -593,6 +598,8 @@ class ImportScripts::Lithium < ImportScripts::Base
break if topics.size < 1 break if topics.size < 1
next if all_records_exist? :posts, topics.map {|topic| "pm_#{topic["note_id"]}"}
create_posts(topics, total: topic_count, offset: offset) do |topic| create_posts(topics, total: topic_count, offset: offset) do |topic|
user_id = user_id_from_imported_user_id(topic["sender_user_id"]) || Discourse::SYSTEM_USER_ID user_id = user_id_from_imported_user_id(topic["sender_user_id"]) || Discourse::SYSTEM_USER_ID

View File

@ -69,6 +69,7 @@ class ImportScripts::Mbox < ImportScripts::Base
batches(BATCH_SIZE) do |offset| batches(BATCH_SIZE) do |offset|
users = user_keys[offset..offset+BATCH_SIZE-1] users = user_keys[offset..offset+BATCH_SIZE-1]
break if users.nil? break if users.nil?
next if all_records_exist? :users, users
create_users(users, total: total_count, offset: offset) do |email| create_users(users, total: total_count, offset: offset) do |email|
{ {
@ -99,6 +100,8 @@ class ImportScripts::Mbox < ImportScripts::Base
topics = all_topics[offset..offset+BATCH_SIZE-1] topics = all_topics[offset..offset+BATCH_SIZE-1]
break if topics.nil? break if topics.nil?
next if all_records_exist? :posts, topics.map {|t| t['id'].to_i}
create_posts(topics, total: topic_count, offset: offset) do |t| create_posts(topics, total: topic_count, offset: offset) do |t|
raw_email = File.read(t['file']) raw_email = File.read(t['file'])
receiver = Email::Receiver.new(raw_email, skip_sanity_check: true) receiver = Email::Receiver.new(raw_email, skip_sanity_check: true)
@ -136,6 +139,8 @@ class ImportScripts::Mbox < ImportScripts::Base
posts = replies[offset..offset+BATCH_SIZE-1] posts = replies[offset..offset+BATCH_SIZE-1]
break if posts.nil? break if posts.nil?
next if all_records_exist? :posts, posts.map {|p| p['id'].to_i}
create_posts(posts, total: post_count, offset: offset) do |p| create_posts(posts, total: post_count, offset: offset) do |p|
parent_id = p['topic'] parent_id = p['topic']
id = p['id'] id = p['id']

View File

@ -48,6 +48,8 @@ class ImportScripts::MyBB < ImportScripts::Base
break if results.size < 1 break if results.size < 1
next if all_records_exist? :users, users.map {|u| u["id"].to_i}
create_users(results, total: total_count, offset: offset) do |user| create_users(results, total: total_count, offset: offset) do |user|
{ id: user['id'], { id: user['id'],
email: user['email'], email: user['email'],
@ -100,6 +102,8 @@ class ImportScripts::MyBB < ImportScripts::Base
break if results.size < 1 break if results.size < 1
next if all_records_exist? :posts, results.map {|m| m['id'].to_i}
create_posts(results, total: total_count, offset: offset) do |m| create_posts(results, total: total_count, offset: offset) do |m|
skip = false skip = false
mapped = {} mapped = {}

View File

@ -40,6 +40,8 @@ class ImportScripts::Nabble < ImportScripts::Base
break if users.ntuples() < 1 break if users.ntuples() < 1
next if all_records_exist? :users, users.map {|u| u["user_id"].to_i}
create_users(users, total: total_count, offset: offset) do |user| create_users(users, total: total_count, offset: offset) do |user|
{ {
id: user["user_id"], id: user["user_id"],
@ -80,6 +82,8 @@ class ImportScripts::Nabble < ImportScripts::Base
break if topics.ntuples() < 1 break if topics.ntuples() < 1
next if all_records_exist? :posts, topics.map {|t| t['node_id'].to_i}
create_posts(topics, total: topic_count, offset: offset) do |t| create_posts(topics, total: topic_count, offset: offset) do |t|
raw = body_from(t) raw = body_from(t)
next unless raw next unless raw
@ -122,6 +126,8 @@ class ImportScripts::Nabble < ImportScripts::Base
break if posts.ntuples() < 1 break if posts.ntuples() < 1
next if all_records_exist? :posts, posts.map {|p| p['node_id'].to_i}
create_posts(posts, total: post_count, offset: offset) do |p| create_posts(posts, total: post_count, offset: offset) do |p|
parent_id = p['parent_id'] parent_id = p['parent_id']
id = p['node_id'] id = p['node_id']

View File

@ -56,6 +56,8 @@ module ImportScripts::PhpBB3
rows = @database.fetch_users(offset) rows = @database.fetch_users(offset)
break if rows.size < 1 break if rows.size < 1
next if all_records_exist? :users, importer.map_to_import_ids(rows)
create_users(rows, total: total_count, offset: offset) do |row| create_users(rows, total: total_count, offset: offset) do |row|
importer.map_user(row) importer.map_user(row)
end end

View File

@ -9,6 +9,10 @@ module ImportScripts::PhpBB3
@settings = settings @settings = settings
end end
def map_to_import_ids(array)
array.map {|u| u[:user_id]}
end
def map_user(row) def map_user(row)
is_active_user = row[:user_inactive_reason] != Constants::INACTIVE_REGISTER is_active_user = row[:user_inactive_reason] != Constants::INACTIVE_REGISTER

View File

@ -43,6 +43,8 @@ class ImportScripts::PunBB < ImportScripts::Base
break if results.size < 1 break if results.size < 1
next if all_records_exist? :users, users.map {|u| u["id"].to_i}
create_users(results, total: total_count, offset: offset) do |user| create_users(results, total: total_count, offset: offset) do |user|
{ id: user['id'], { id: user['id'],
email: user['email'], email: user['email'],
@ -118,6 +120,7 @@ class ImportScripts::PunBB < ImportScripts::Base
").to_a ").to_a
break if results.size < 1 break if results.size < 1
next if all_records_exist? :posts, results.map {|m| m['id'].to_i}
create_posts(results, total: total_count, offset: offset) do |m| create_posts(results, total: total_count, offset: offset) do |m|
skip = false skip = false

View File

@ -77,6 +77,7 @@ class ImportScripts::Sfn < ImportScripts::Base
SQL SQL
break if users.size < 1 break if users.size < 1
next if all_records_exist? :users, users.map {|u| u["id"].to_i}
create_users(users, total: user_count, offset: offset) do |user| create_users(users, total: user_count, offset: offset) do |user|
external_user = @external_users[user["id"]] external_user = @external_users[user["id"]]
@ -231,6 +232,7 @@ class ImportScripts::Sfn < ImportScripts::Base
SQL SQL
break if topics.size < 1 break if topics.size < 1
next if all_records_exist? :posts, topics.map {|t| t['id'].to_i}
create_posts(topics, total: topic_count, offset: offset) do |topic| create_posts(topics, total: topic_count, offset: offset) do |topic|
next unless category_id = CATEGORY_MAPPING[topic["category_id"]] next unless category_id = CATEGORY_MAPPING[topic["category_id"]]
@ -282,6 +284,8 @@ class ImportScripts::Sfn < ImportScripts::Base
break if posts.size < 1 break if posts.size < 1
next if all_records_exist? :posts, posts.map {|p| p['id'].to_i}
create_posts(posts, total: posts_count, offset: offset) do |post| create_posts(posts, total: posts_count, offset: offset) do |post|
next unless parent = topic_lookup_from_imported_post_id(post["topic_id"]) next unless parent = topic_lookup_from_imported_post_id(post["topic_id"])

View File

@ -173,6 +173,8 @@ class ImportScripts::Tnation < ImportScripts::Base
break if users.size < 1 break if users.size < 1
next if all_records_exist? :users, users.map {|u| u["id"].to_i}
user_bios = {} user_bios = {}
user_avatars = {} user_avatars = {}
user_properties = {} user_properties = {}
@ -317,6 +319,7 @@ class ImportScripts::Tnation < ImportScripts::Base
posts = posts.to_a posts = posts.to_a
break if posts.size < 1 break if posts.size < 1
next if all_records_exist? :posts, posts.map {|p| p['id'].to_i}
# load images # load images
forum_images = {} forum_images = {}

View File

@ -42,6 +42,8 @@ class ImportScripts::VanillaSQL < ImportScripts::Base
break if results.size < 1 break if results.size < 1
next if all_records_exist? :users, users.map {|u| u['UserID'].to_i}
create_users(results, total: total_count, offset: offset) do |user| create_users(results, total: total_count, offset: offset) do |user|
next if user['Email'].blank? next if user['Email'].blank?
next if user['Name'].blank? next if user['Name'].blank?
@ -92,6 +94,7 @@ class ImportScripts::VanillaSQL < ImportScripts::Base
OFFSET #{offset};") OFFSET #{offset};")
break if discussions.size < 1 break if discussions.size < 1
next if all_records_exist? :posts, discussions.map {|t| "discussion#" + t['DiscussionID'].to_s}
create_posts(discussions, total: total_count, offset: offset) do |discussion| create_posts(discussions, total: total_count, offset: offset) do |discussion|
{ {
@ -121,6 +124,7 @@ class ImportScripts::VanillaSQL < ImportScripts::Base
OFFSET #{offset};") OFFSET #{offset};")
break if comments.size < 1 break if comments.size < 1
next if all_records_exist? :posts, comments.map {|comment| "comment#" + comment['CommentID'].to_s}
create_posts(comments, total: total_count, offset: offset) do |comment| create_posts(comments, total: total_count, offset: offset) do |comment|
next unless t = topic_lookup_from_imported_post_id("discussion#" + comment['DiscussionID'].to_s) next unless t = topic_lookup_from_imported_post_id("discussion#" + comment['DiscussionID'].to_s)

View File

@ -71,6 +71,8 @@ class ImportScripts::VBulletin < ImportScripts::Base
break if users.size < 1 break if users.size < 1
next if all_records_exist? :users, users.map {|u| u["userid"].to_i}
create_users(users, total: user_count, offset: offset) do |user| create_users(users, total: user_count, offset: offset) do |user|
username = @htmlentities.decode(user["username"]).strip username = @htmlentities.decode(user["username"]).strip
@ -208,6 +210,7 @@ class ImportScripts::VBulletin < ImportScripts::Base
SQL SQL
break if topics.size < 1 break if topics.size < 1
next if all_records_exist? :posts, topics.map {|t| "thread-#{topic["threadid"]}" }
create_posts(topics, total: topic_count, offset: offset) do |topic| create_posts(topics, total: topic_count, offset: offset) do |topic|
raw = preprocess_post_raw(topic["raw"]) rescue nil raw = preprocess_post_raw(topic["raw"]) rescue nil
@ -249,6 +252,7 @@ class ImportScripts::VBulletin < ImportScripts::Base
SQL SQL
break if posts.size < 1 break if posts.size < 1
next if all_records_exist? :posts, posts.map {|p| p["postid"] }
create_posts(posts, total: post_count, offset: offset) do |post| create_posts(posts, total: post_count, offset: offset) do |post|
raw = preprocess_post_raw(post["raw"]) rescue nil raw = preprocess_post_raw(post["raw"]) rescue nil

View File

@ -42,6 +42,8 @@ class ImportScripts::XenForo < ImportScripts::Base
break if results.size < 1 break if results.size < 1
next if all_records_exist? :users, users.map {|u| u["id"].to_i}
create_users(results, total: total_count, offset: offset) do |user| create_users(results, total: total_count, offset: offset) do |user|
next if user['username'].blank? next if user['username'].blank?
{ id: user['id'], { id: user['id'],
@ -98,6 +100,7 @@ class ImportScripts::XenForo < ImportScripts::Base
").to_a ").to_a
break if results.size < 1 break if results.size < 1
next if all_records_exist? :posts, results.map {|p| p['id'] }
create_posts(results, total: total_count, offset: offset) do |m| create_posts(results, total: total_count, offset: offset) do |m|
skip = false skip = false