Merge pull request #4987 from quangbuule/fix-vbulletin-bulk-import

FIX: vBulletin bulk importer: emails and stats
This commit is contained in:
Régis Hanol 2017-07-24 14:55:11 +02:00 committed by GitHub
commit 4dbe0280fb
2 changed files with 112 additions and 3 deletions

View File

@ -93,6 +93,7 @@ class BulkImport::Base
puts "Loading users indexes..."
@last_user_id = User.unscoped.maximum(:id)
@last_user_email_id = UserEmail.unscoped.maximum(:id)
@emails = User.unscoped.joins(:user_emails).pluck(:"user_emails.email").to_set
@usernames_lower = User.unscoped.pluck(:username_lower).to_set
@mapped_usernames = UserCustomField.joins(:user).where(name: "import_username").pluck("user_custom_fields.value", "users.username").to_h
@ -143,6 +144,17 @@ class BulkImport::Base
suspended_at suspended_till last_emailed_at created_at updated_at
}
USER_EMAIL_COLUMNS ||= %i{
id user_id email primary created_at updated_at
}
USER_STAT_COLUMNS ||= %i{
user_id topics_entered time_read days_visited posts_read_count
likes_given likes_received topic_reply_count new_since read_faq
first_post_created_at post_count topic_count bounce_score
reset_bounce_score_after
}
USER_PROFILE_COLUMNS ||= %i{
user_id location website bio_raw bio_cooked views
}
@ -185,6 +197,8 @@ class BulkImport::Base
end
end
def create_user_emails(rows, &block) create_records(rows, "user_email", USER_EMAIL_COLUMNS, &block); end
def create_user_stats(rows, &block) create_records(rows, "user_stat", USER_STAT_COLUMNS, &block); end
def create_user_profiles(rows, &block); create_records(rows, "user_profile", USER_PROFILE_COLUMNS, &block); end
def create_group_users(rows, &block); create_records(rows, "group_user", GROUP_USER_COLUMNS, &block); end
def create_categories(rows, &block); create_records(rows, "category", CATEGORY_COLUMNS, &block); end
@ -246,6 +260,38 @@ class BulkImport::Base
user
end
def process_user_email(user_email)
user_email[:id] = @last_user_email_id += 1;
user_email[:user_id] = @users[user_email[:imported_user_id].to_s]
user_email[:primary] = true
user_email[:created_at] ||= NOW
user_email[:updated_at] ||= user_email[:created_at]
user_email[:email] ||= random_email
user_email[:email].downcase!
# unique email
user_email[:email] = random_email until user_email[:email] =~ EmailValidator.email_regex && @emails.add?(user_email[:email])
user_email
end
def process_user_stat(user_stat)
user_stat[:user_id] = @users[user_stat[:imported_user_id].to_s]
user_stat[:topic_reply_count] = user_stat[:post_count] - user_stat[:topic_count]
user_stat[:topics_entered] ||= 0
user_stat[:time_read] ||= 0
user_stat[:days_visited] ||= 0
user_stat[:posts_read_count] ||= 0
user_stat[:likes_given] ||= 0
user_stat[:likes_received] ||= 0
user_stat[:topic_reply_count] ||= 0
user_stat[:new_since] ||= NOW
user_stat[:post_count] ||= 0
user_stat[:topic_count] ||= 0
user_stat[:bounce_score] ||= 0
user_stat
end
def process_user_profile(user_profile)
user_profile[:bio_raw] = (user_profile[:bio_raw].presence || "").scrub.strip.presence
user_profile[:bio_cooked] = pre_cook(user_profile[:bio_raw]) if user_profile[:bio_raw].present?
@ -428,10 +474,9 @@ class BulkImport::Base
def create_records(rows, name, columns)
start = Time.now
imported_ids = []
process_method_name = "process_#{name}"
sql = "COPY #{name.pluralize} (#{columns.join(",")}) FROM STDIN"
sql = "COPY #{name.pluralize} (#{columns.map {|c| "\"#{c}\""}.join(",")}) FROM STDIN"
@raw_connection.copy_data(sql, @encoder) do
rows.each do |row|

View File

@ -18,6 +18,15 @@ class BulkImport::VBulletin < BulkImport::Base
@client = Mysql2::Client.new(host: host, username: username, password: password, database: database)
@client.query_options.merge!(as: :array, cache_rows: false)
@has_post_thanks = mysql_query(<<-SQL
SELECT `COLUMN_NAME`
FROM `INFORMATION_SCHEMA`.`COLUMNS`
WHERE `TABLE_SCHEMA`='#{database}'
AND `TABLE_NAME`='user'
AND `COLUMN_NAME` LIKE 'post_thanks_%'
SQL
).to_a.count > 0
end
def execute
@ -25,6 +34,9 @@ class BulkImport::VBulletin < BulkImport::Base
import_users
import_group_users
import_user_emails
import_user_stats
import_user_passwords
import_user_salts
import_user_profiles
@ -73,7 +85,6 @@ class BulkImport::VBulletin < BulkImport::Base
u = {
imported_id: row[0],
username: row[1],
email: row[2],
created_at: Time.zone.at(row[3]),
date_of_birth: parse_birthday(row[4]),
primary_group_id: group_id_from_imported_id(row[6]),
@ -87,6 +98,59 @@ class BulkImport::VBulletin < BulkImport::Base
end
end
def import_user_emails
puts "Importing user emails..."
users = mysql_stream <<-SQL
SELECT user.userid, email, joindate
FROM user
WHERE user.userid > #{@last_imported_user_id}
ORDER BY user.userid
SQL
create_user_emails(users) do |row|
{
imported_id: row[0],
imported_user_id: row[0],
email: row[1],
created_at: Time.zone.at(row[2])
}
end
end
def import_user_stats
puts "Importing user stats..."
users = mysql_stream <<-SQL
SELECT user.userid, joindate, posts, COUNT(thread.threadid) AS threads, post.dateline
#{", post_thanks_user_amount, post_thanks_thanked_times" if @has_post_thanks}
FROM user
LEFT OUTER JOIN post ON post.postid = user.lastpostid
LEFT OUTER JOIN thread ON user.userid = thread.postuserid
WHERE user.userid > #{@last_imported_user_id}
GROUP BY user.userid
ORDER BY user.userid
SQL
create_user_stats(users) do |row|
user = {
imported_id: row[0],
imported_user_id: row[0],
new_since: Time.zone.at(row[1]),
post_count: row[2],
topic_count: row[3],
first_post_created_at: row[4] && Time.zone.at(row[4])
}
if @has_post_thanks
user[:likes_given] = row[5]
user[:likes_received] = row[6]
end
user
end
end
def import_group_users
puts "Importing group users..."