DEV: Bulk imports should find existing users by email (#14468)
Without this change, bulk imports unconditionally create new user records even when a user with the same email address exists.
This commit is contained in:
parent
574cb28b0e
commit
a4d0d866aa
|
@ -153,6 +153,7 @@ class BulkImport::Base
|
|||
puts "Loading imported user ids..."
|
||||
@users, imported_user_ids = imported_ids("user")
|
||||
@last_imported_user_id = imported_user_ids.max || -1
|
||||
@pre_existing_user_ids = Set.new
|
||||
|
||||
puts "Loading imported category ids..."
|
||||
@categories, imported_category_ids = imported_ids("category")
|
||||
|
@ -197,7 +198,7 @@ class BulkImport::Base
|
|||
puts "Loading users indexes..."
|
||||
@last_user_id = last_id(User)
|
||||
@last_user_email_id = last_id(UserEmail)
|
||||
@emails = User.unscoped.joins(:user_emails).pluck(:"user_emails.email").to_set
|
||||
@emails = User.unscoped.joins(:user_emails).pluck(:"user_emails.email", :"user_emails.user_id").to_h
|
||||
@usernames_lower = User.unscoped.pluck(:username_lower).to_set
|
||||
@mapped_usernames = UserCustomField.joins(:user).where(name: "import_username").pluck("user_custom_fields.value", "users.username").to_h
|
||||
|
||||
|
@ -393,6 +394,17 @@ class BulkImport::Base
|
|||
end
|
||||
|
||||
def process_user(user)
|
||||
if user[:email].present?
|
||||
user[:email].downcase!
|
||||
|
||||
if existing_user_id = @emails[user[:email]]
|
||||
@pre_existing_user_ids << existing_user_id
|
||||
@users[user[:imported_id].to_i] = existing_user_id
|
||||
user[:skip] = true
|
||||
return user
|
||||
end
|
||||
end
|
||||
|
||||
@users[user[:imported_id].to_i] = user[:id] = @last_user_id += 1
|
||||
|
||||
imported_username = user[:username].dup
|
||||
|
@ -412,11 +424,6 @@ class BulkImport::Base
|
|||
end
|
||||
|
||||
user[:username_lower] = user[:username].downcase
|
||||
user[:email] ||= random_email
|
||||
user[:email].downcase!
|
||||
|
||||
# unique email
|
||||
user[:email] = random_email until user[:email] =~ EmailValidator.email_regex && @emails.add?(user[:email])
|
||||
user[:trust_level] ||= TrustLevel[1]
|
||||
user[:active] = true unless user.has_key?(:active)
|
||||
user[:admin] ||= false
|
||||
|
@ -428,18 +435,28 @@ class BulkImport::Base
|
|||
end
|
||||
|
||||
def process_user_email(user_email)
|
||||
user_id = @users[user_email[:imported_user_id].to_i]
|
||||
return { skip: true } if @pre_existing_user_ids.include?(user_id)
|
||||
|
||||
user_email[:id] = @last_user_email_id += 1
|
||||
user_email[:user_id] = @users[user_email[:imported_user_id].to_i]
|
||||
user_email[:user_id] = user_id
|
||||
user_email[:primary] = true
|
||||
user_email[:created_at] ||= NOW
|
||||
user_email[:updated_at] ||= user_email[:created_at]
|
||||
|
||||
user_email[:email] ||= random_email
|
||||
user_email[:email].downcase!
|
||||
# unique email
|
||||
user_email[:email] = random_email until user_email[:email] =~ EmailValidator.email_regex && !@emails.has_key?(user_email[:email])
|
||||
|
||||
user_email
|
||||
end
|
||||
|
||||
def process_user_stat(user_stat)
|
||||
user_stat[:user_id] = @users[user_stat[:imported_user_id].to_i]
|
||||
user_id = @users[user_stat[:imported_user_id].to_i]
|
||||
return { skip: true } if @pre_existing_user_ids.include?(user_id)
|
||||
|
||||
user_stat[:user_id] = user_id
|
||||
user_stat[:topics_entered] ||= 0
|
||||
user_stat[:time_read] ||= 0
|
||||
user_stat[:days_visited] ||= 0
|
||||
|
@ -455,6 +472,8 @@ class BulkImport::Base
|
|||
end
|
||||
|
||||
def process_user_profile(user_profile)
|
||||
return { skip: true } if @pre_existing_user_ids.include?(user_profile[:user_id])
|
||||
|
||||
user_profile[:bio_raw] = (user_profile[:bio_raw].presence || "").scrub.strip.presence
|
||||
user_profile[:bio_cooked] = pre_cook(user_profile[:bio_raw]) if user_profile[:bio_raw].present?
|
||||
user_profile[:views] ||= 0
|
||||
|
@ -697,7 +716,7 @@ class BulkImport::Base
|
|||
processed = send(process_method_name, mapped)
|
||||
imported_ids << mapped[:imported_id] unless mapped[:imported_id].nil?
|
||||
imported_ids |= mapped[:imported_ids] unless mapped[:imported_ids].nil?
|
||||
@raw_connection.put_copy_data columns.map { |c| processed[c] }
|
||||
@raw_connection.put_copy_data columns.map { |c| processed[c] } unless processed[:skip]
|
||||
print "\r%7d - %6d/sec" % [imported_ids.size, imported_ids.size.to_f / (Time.now - start)] if imported_ids.size % 5000 == 0
|
||||
rescue => e
|
||||
puts "\n"
|
||||
|
|
|
@ -83,6 +83,7 @@ class BulkImport::PhpBB < BulkImport::Base
|
|||
u = {
|
||||
imported_id: row["user_id"],
|
||||
username: normalize_text(row["username"]),
|
||||
email: row["user_email"],
|
||||
created_at: Time.zone.at(row["user_regdate"].to_i),
|
||||
last_seen_at: row["user_lastvisit"] == 0 ? Time.zone.at(row["user_regdate"].to_i) : Time.zone.at(row["user_lastvisit"].to_i),
|
||||
trust_level: row["user_posts"] == 0 ? TrustLevel[0] : TrustLevel[1],
|
||||
|
|
|
@ -118,6 +118,7 @@ class BulkImport::VBulletin < BulkImport::Base
|
|||
imported_id: row[0],
|
||||
username: normalize_text(row[1]),
|
||||
name: normalize_text(row[1]),
|
||||
email: row[2],
|
||||
created_at: Time.zone.at(row[3]),
|
||||
date_of_birth: parse_birthday(row[4]),
|
||||
primary_group_id: group_id_from_imported_id(row[6]),
|
||||
|
|
Loading…
Reference in New Issue