diff --git a/script/import_scripts/base.rb b/script/import_scripts/base.rb index ede4a7654d1..4d001e4a8c4 100644 --- a/script/import_scripts/base.rb +++ b/script/import_scripts/base.rb @@ -203,24 +203,23 @@ class ImportScripts::Base def all_records_exist?(type, import_ids) return false if import_ids.empty? - orig_conn = ActiveRecord::Base.connection - conn = orig_conn.raw_connection - - conn.exec('CREATE TEMP TABLE import_ids(val varchar(200) PRIMARY KEY)') + connection = ActiveRecord::Base.connection.raw_connection + connection.exec('CREATE TEMP TABLE import_ids(val text PRIMARY KEY)') import_id_clause = import_ids.map { |id| "('#{PG::Connection.escape_string(id.to_s)}')" }.join(",") - conn.exec("INSERT INTO import_ids VALUES #{import_id_clause}") + connection.exec("INSERT INTO import_ids VALUES #{import_id_clause}") - existing = "#{type.to_s.classify}CustomField".constantize.where(name: 'import_id') - existing = existing.joins('JOIN import_ids ON val = value') - - if existing.count == import_ids.length + existing = "#{type.to_s.classify}CustomField".constantize + existing = existing.where(name: 'import_id') + .joins('JOIN import_ids ON val = value') + .count + if existing == import_ids.length puts "Skipping #{import_ids.length} already imported #{type}" return true end ensure - conn.exec('DROP TABLE import_ids') + connection.exec('DROP TABLE import_ids') end def created_user(user) diff --git a/script/import_scripts/base/lookup_container.rb b/script/import_scripts/base/lookup_container.rb index 0d8070932ae..86513775a28 100644 --- a/script/import_scripts/base/lookup_container.rb +++ b/script/import_scripts/base/lookup_container.rb @@ -95,5 +95,14 @@ module ImportScripts url: post.url, } end + + def user_already_imported?(import_id) + @users.has_key?(import_id) || @users.has_key?(import_id.to_s) + end + + def post_already_imported?(import_id) + @posts.has_key?(import_id) || @posts.has_key?(import_id.to_s) + end + end end diff --git a/script/import_scripts/vbulletin.rb b/script/import_scripts/vbulletin.rb index 232ad987d80..f7733d60c9a 100644 --- a/script/import_scripts/vbulletin.rb +++ b/script/import_scripts/vbulletin.rb @@ -73,6 +73,8 @@ EOM def execute + mysql_query("CREATE INDEX firstpostid_index ON #{TABLE_PREFIX}thread (firstpostid)") rescue nil + import_groups import_users create_groups_membership @@ -111,27 +113,35 @@ EOM user_count = mysql_query("SELECT COUNT(userid) count FROM #{TABLE_PREFIX}user").first["count"] + last_user_id = -1 + batches(BATCH_SIZE) do |offset| - users = mysql_query <<-SQL + users = mysql_query(<<-SQL SELECT userid, username, homepage, usertitle, usergroupid, joindate, email FROM #{TABLE_PREFIX}user + WHERE userid > #{last_user_id} ORDER BY userid LIMIT #{BATCH_SIZE} - OFFSET #{offset} SQL + ).to_a - break if users.size < 1 + break if users.empty? - next if all_records_exist? :users, users.map {|u| u["userid"].to_i} + last_user_id = users[-1]["userid"] + before = users.size + users.reject! { |u| @lookup.user_already_imported?(u["userid"].to_i) } create_users(users, total: user_count, offset: offset) do |user| + email = user["email"].presence || fake_email + email = fake_email unless email[EmailValidator.email_regex] + username = @htmlentities.decode(user["username"]).strip { id: user["userid"], name: username, username: username, - email: user["email"].presence || fake_email, + email: email, website: user["homepage"].strip, title: @htmlentities.decode(user["usertitle"]).strip, primary_group_id: group_id_from_imported_group_id(user["usergroupid"].to_i), @@ -275,19 +285,24 @@ EOM topic_count = mysql_query("SELECT COUNT(threadid) count FROM #{TABLE_PREFIX}thread").first["count"] + last_topic_id = -1 + batches(BATCH_SIZE) do |offset| - topics = mysql_query <<-SQL + topics = mysql_query(<<-SQL SELECT t.threadid threadid, t.title title, forumid, open, postuserid, t.dateline dateline, views, t.visible visible, sticky, p.pagetext raw FROM #{TABLE_PREFIX}thread t JOIN #{TABLE_PREFIX}post p ON p.postid = t.firstpostid + WHERE t.threadid > #{last_topic_id} ORDER BY t.threadid LIMIT #{BATCH_SIZE} - OFFSET #{offset} SQL + ).to_a - break if topics.size < 1 - next if all_records_exist? :posts, topics.map {|t| "thread-#{t["threadid"]}" } + break if topics.empty? + + last_topic_id = topics[-1]["threadid"] + topics.reject! { |t| @lookup.post_already_imported?("thread-#{t["threadid"]}") } create_posts(topics, total: topic_count, offset: offset) do |topic| raw = preprocess_post_raw(topic["raw"]) rescue nil @@ -324,27 +339,32 @@ EOM def import_posts puts "", "importing posts..." - # make sure `firstpostid` is indexed - begin - mysql_query("CREATE INDEX firstpostid_index ON #{TABLE_PREFIX}thread (firstpostid)") - rescue Mysql2::Error - puts 'Index already exists' - end + post_count = mysql_query(<<-SQL + SELECT COUNT(postid) count + FROM #{TABLE_PREFIX}post p + JOIN #{TABLE_PREFIX}thread t ON t.threadid = p.threadid + WHERE t.firstpostid <> p.postid + SQL + ).first["count"] - post_count = mysql_query("SELECT COUNT(postid) count FROM #{TABLE_PREFIX}post WHERE postid NOT IN (SELECT firstpostid FROM #{TABLE_PREFIX}thread)").first["count"] + last_post_id = -1 batches(BATCH_SIZE) do |offset| - posts = mysql_query <<-SQL - SELECT postid, userid, threadid, pagetext raw, dateline, visible, parentid - FROM #{TABLE_PREFIX}post - WHERE postid NOT IN (SELECT firstpostid FROM #{TABLE_PREFIX}thread) - ORDER BY postid + posts = mysql_query(<<-SQL + SELECT p.postid, p.userid, p.threadid, p.pagetext raw, p.dateline, p.visible, p.parentid + FROM #{TABLE_PREFIX}post p + JOIN #{TABLE_PREFIX}thread t ON t.threadid = p.threadid + WHERE t.firstpostid <> p.postid + AND p.postid > #{last_post_id} + ORDER BY p.postid LIMIT #{BATCH_SIZE} - OFFSET #{offset} SQL + ).to_a - break if posts.size < 1 - next if all_records_exist? :posts, posts.map {|p| p["postid"] } + break if posts.empty? + + last_post_id = posts[-1]["postid"] + posts.reject! { |p| @lookup.post_already_imported?(p["postid"].to_i) } create_posts(posts, total: post_count, offset: offset) do |post| raw = preprocess_post_raw(post["raw"]) rescue nil @@ -374,16 +394,17 @@ EOM WHERE a.attachmentid = #{attachment_id}" results = mysql_query(sql) - unless (row = results.first) + unless row = results.first puts "Couldn't find attachment record for post.id = #{post.id}, import_id = #{post.custom_fields['import_id']}" - return nil + return end filename = File.join(ATTACHMENT_DIR, row['user_id'].to_s.split('').join('/'), "#{row['file_id']}.attach") unless File.exists?(filename) puts "Attachment file doesn't exist: #{filename}" - return nil + return end + real_filename = row['filename'] real_filename.prepend SecureRandom.hex if real_filename[0] == '.' upload = create_upload(post.user.id, filename, real_filename) @@ -391,15 +412,14 @@ EOM if upload.nil? || !upload.valid? puts "Upload not valid :(" puts upload.errors.inspect if upload - return nil + return end - return upload, real_filename + [upload, real_filename] rescue Mysql2::Error => e puts "SQL Error" puts e.message puts sql - return nil end @@ -408,17 +428,22 @@ EOM topic_count = mysql_query("SELECT COUNT(pmtextid) count FROM #{TABLE_PREFIX}pmtext").first["count"] - batches(BATCH_SIZE) do |offset| - private_messages = mysql_query <<-SQL - SELECT pmtextid, fromuserid, title, message, touserarray, dateline - FROM #{TABLE_PREFIX}pmtext - ORDER BY pmtextid - LIMIT #{BATCH_SIZE} - OFFSET #{offset} - SQL + last_private_message_id = -1 - break if private_messages.size < 1 - next if all_records_exist? :posts, private_messages.map {|pm| "pm-#{pm['pmtextid']}" } + batches(BATCH_SIZE) do |offset| + private_messages = mysql_query(<<-SQL + SELECT pmtextid, fromuserid, title, message, touserarray, dateline + FROM #{TABLE_PREFIX}pmtext + WHERE pmtextid > #{last_private_message_id} + ORDER BY pmtextid + LIMIT #{BATCH_SIZE} + SQL + ).to_a + + break if private_messages.empty? + + last_private_message_id = private_messages[-1]["pmtextid"] + private_messages.reject! { |pm| @lookup.post_already_imported?("pm-#{pm['pmtextid']}") } title_username_of_pm_first_post = {} @@ -476,12 +501,13 @@ EOM if title =~ /^Re:/ - parent_id = title_username_of_pm_first_post[[title[3..-1], participants]] - parent_id = title_username_of_pm_first_post[[title[4..-1], participants]] unless parent_id - parent_id = title_username_of_pm_first_post[[title[5..-1], participants]] unless parent_id - parent_id = title_username_of_pm_first_post[[title[6..-1], participants]] unless parent_id - parent_id = title_username_of_pm_first_post[[title[7..-1], participants]] unless parent_id - parent_id = title_username_of_pm_first_post[[title[8..-1], participants]] unless parent_id + parent_id = title_username_of_pm_first_post[[title[3..-1], participants]] || + title_username_of_pm_first_post[[title[4..-1], participants]] || + title_username_of_pm_first_post[[title[5..-1], participants]] || + title_username_of_pm_first_post[[title[6..-1], participants]] || + title_username_of_pm_first_post[[title[7..-1], participants]] || + title_username_of_pm_first_post[[title[8..-1], participants]] + if parent_id if t = topic_lookup_from_imported_post_id("pm-#{parent_id}") topic_id = t[:topic_id] @@ -496,7 +522,7 @@ EOM mapped[:archetype] = Archetype.private_message mapped[:target_usernames] = target_usernames.join(',') - if mapped[:target_usernames].empty? # pm with yourself? + if mapped[:target_usernames].size < 1 # pm with yourself? # skip = true mapped[:target_usernames] = "system" puts "pm-#{m['pmtextid']} has no target (#{m['touserarray']})" @@ -515,7 +541,14 @@ EOM puts '', 'importing attachments...' current_count = 0 - total_count = mysql_query("SELECT COUNT(postid) count FROM #{TABLE_PREFIX}post WHERE postid NOT IN (SELECT firstpostid FROM #{TABLE_PREFIX}thread)").first["count"] + + total_count = mysql_query(<<-SQL + SELECT COUNT(postid) count + FROM #{TABLE_PREFIX}post p + JOIN #{TABLE_PREFIX}thread t ON t.threadid = p.threadid + WHERE t.firstpostid <> p.postid + SQL + ).first["count"] success_count = 0 fail_count = 0