Optimize import script - vanilla_mysql.rb

Use WHERE id > last_id instead of OFFSET because OFFSET causes performance issues
This commit is contained in:
Melroy Neil Dsouza 2017-06-19 10:30:55 +05:30 committed by GitHub
parent a23ce56682
commit 420abce549
1 changed files with 15 additions and 12 deletions

View File

@ -52,27 +52,26 @@ class ImportScripts::VanillaSQL < ImportScripts::Base
@user_is_deleted = false @user_is_deleted = false
@last_deleted_username = nil @last_deleted_username = nil
username = nil username = nil
@last_user_id = -1
total_count = mysql_query("SELECT count(*) count FROM #{TABLE_PREFIX}User;").first['count'] total_count = mysql_query("SELECT count(*) count FROM #{TABLE_PREFIX}User;").first['count']
batches(BATCH_SIZE) do |offset| batches(BATCH_SIZE) do |offset|
results = mysql_query( results = mysql_query(
"SELECT UserID, Name, Title, Location, About, Email, "SELECT UserID, Name, Title, Location, About, Email,
DateInserted, DateLastActive, InsertIPAddress, Admin DateInserted, DateLastActive, InsertIPAddress, Admin
WHERE id > #{@last_user_id}
FROM #{TABLE_PREFIX}User FROM #{TABLE_PREFIX}User
ORDER BY UserID ASC ORDER BY UserID ASC
LIMIT #{BATCH_SIZE} LIMIT #{BATCH_SIZE};")
OFFSET #{offset};")
break if results.size < 1 break if results.size < 1
@last_user_id = results.to_a.last['UserID']
next if all_records_exist? :users, results.map {|u| u['UserID'].to_i} next if all_records_exist? :users, results.map {|u| u['UserID'].to_i}
create_users(results, total: total_count, offset: offset) do |user| create_users(results, total: total_count, offset: offset) do |user|
next if user['Email'].blank? next if user['Email'].blank?
next if user['Name'].blank? next if user['Name'].blank?
next if @lookup.user_id_from_imported_user_id(user['UserID']) next if @lookup.user_id_from_imported_user_id(user['UserID'])
if user['Name'] == '[Deleted User]' if user['Name'] == '[Deleted User]'
# EVERY deleted user record in Vanilla has the same username: [Deleted User] # EVERY deleted user record in Vanilla has the same username: [Deleted User]
# Save our UserNameSuggester some pain: # Save our UserNameSuggester some pain:
@ -198,17 +197,20 @@ class ImportScripts::VanillaSQL < ImportScripts::Base
tag_names_sql = "select t.name as tag_name from GDN_Tag t, GDN_TagDiscussion td where t.tagid = td.tagid and td.discussionid = {discussionid} and t.name != '';" tag_names_sql = "select t.name as tag_name from GDN_Tag t, GDN_TagDiscussion td where t.tagid = td.tagid and td.discussionid = {discussionid} and t.name != '';"
total_count = mysql_query("SELECT count(*) count FROM #{TABLE_PREFIX}Discussion;").first['count'] total_count = mysql_query("SELECT count(*) count FROM #{TABLE_PREFIX}Discussion;").first['count']
@last_topic_id = -1
batches(BATCH_SIZE) do |offset| batches(BATCH_SIZE) do |offset|
discussions = mysql_query( discussions = mysql_query(
"SELECT DiscussionID, CategoryID, Name, Body, "SELECT DiscussionID, CategoryID, Name, Body,
DateInserted, InsertUserID DateInserted, InsertUserID
WHERE DiscussionID > #{@last_topic_id}
FROM #{TABLE_PREFIX}Discussion FROM #{TABLE_PREFIX}Discussion
ORDER BY DiscussionID ASC ORDER BY DiscussionID ASC
LIMIT #{BATCH_SIZE} LIMIT #{BATCH_SIZE};")
OFFSET #{offset};")
break if discussions.size < 1 break if discussions.size < 1
@last_topic_id = discussions.to_a.last['DiscussionID']
next if all_records_exist? :posts, discussions.map {|t| "discussion#" + t['DiscussionID'].to_s} next if all_records_exist? :posts, discussions.map {|t| "discussion#" + t['DiscussionID'].to_s}
create_posts(discussions, total: total_count, offset: offset) do |discussion| create_posts(discussions, total: total_count, offset: offset) do |discussion|
@ -234,17 +236,18 @@ class ImportScripts::VanillaSQL < ImportScripts::Base
puts "", "importing posts..." puts "", "importing posts..."
total_count = mysql_query("SELECT count(*) count FROM #{TABLE_PREFIX}Comment;").first['count'] total_count = mysql_query("SELECT count(*) count FROM #{TABLE_PREFIX}Comment;").first['count']
@last_post_id = -1
batches(BATCH_SIZE) do |offset| batches(BATCH_SIZE) do |offset|
comments = mysql_query( comments = mysql_query(
"SELECT CommentID, DiscussionID, Body, "SELECT CommentID, DiscussionID, Body,
DateInserted, InsertUserID DateInserted, InsertUserID
WHERE CommentID > #{@last_post_id}
FROM #{TABLE_PREFIX}Comment FROM #{TABLE_PREFIX}Comment
ORDER BY CommentID ASC ORDER BY CommentID ASC
LIMIT #{BATCH_SIZE} LIMIT #{BATCH_SIZE};")
OFFSET #{offset};")
break if comments.size < 1 break if comments.size < 1
@last_post_id = comments.to_a.last['CommentID']
next if all_records_exist? :posts, comments.map {|comment| "comment#" + comment['CommentID'].to_s} next if all_records_exist? :posts, comments.map {|comment| "comment#" + comment['CommentID'].to_s}
create_posts(comments, total: total_count, offset: offset) do |comment| create_posts(comments, total: total_count, offset: offset) do |comment|