DEV: Improve user generic bulk importer anonymization (#27307)
* DEV: Improve user generic bulk importer anonymization Add support for properly anonymizing: - email - date_of_birth - location - website - bio * DEV: Remove uneeded anon username check in `import_user_emails`
This commit is contained in:
parent
c67f810a4b
commit
f2c4474c1e
|
@ -415,17 +415,11 @@ class BulkImport::Generic < BulkImport::Base
|
||||||
end
|
end
|
||||||
|
|
||||||
if row["anonymized"] == 1
|
if row["anonymized"] == 1
|
||||||
while true
|
row["username"] = "anon_#{anon_username_suffix}"
|
||||||
anon_suffix = (SecureRandom.random_number * 100_000_000).to_i
|
|
||||||
break if !@anonymized_user_suffixes.include?(anon_suffix)
|
|
||||||
end
|
|
||||||
|
|
||||||
row["username"] = "anon_#{anon_suffix}"
|
|
||||||
row["email"] = "#{row["username"]}#{UserAnonymizer::EMAIL_SUFFIX}"
|
row["email"] = "#{row["username"]}#{UserAnonymizer::EMAIL_SUFFIX}"
|
||||||
row["name"] = nil
|
row["name"] = nil
|
||||||
row["registration_ip_address"] = nil
|
row["registration_ip_address"] = nil
|
||||||
|
row["date_of_birth"] = nil
|
||||||
@anonymized_user_suffixes << anon_suffix
|
|
||||||
end
|
end
|
||||||
|
|
||||||
{
|
{
|
||||||
|
@ -455,7 +449,7 @@ class BulkImport::Generic < BulkImport::Base
|
||||||
existing_user_ids = UserEmail.pluck(:user_id).to_set
|
existing_user_ids = UserEmail.pluck(:user_id).to_set
|
||||||
|
|
||||||
users = query(<<~SQL)
|
users = query(<<~SQL)
|
||||||
SELECT id, email, created_at
|
SELECT id, email, created_at, anonymized
|
||||||
FROM users
|
FROM users
|
||||||
ORDER BY id
|
ORDER BY id
|
||||||
SQL
|
SQL
|
||||||
|
@ -464,6 +458,11 @@ class BulkImport::Generic < BulkImport::Base
|
||||||
user_id = user_id_from_imported_id(row["id"])
|
user_id = user_id_from_imported_id(row["id"])
|
||||||
next if user_id && existing_user_ids.include?(user_id)
|
next if user_id && existing_user_ids.include?(user_id)
|
||||||
|
|
||||||
|
if row["anonymized"] == 1
|
||||||
|
username = username_from_id(user_id)
|
||||||
|
row["email"] = "#{username}#{UserAnonymizer::EMAIL_SUFFIX}"
|
||||||
|
end
|
||||||
|
|
||||||
{ user_id: user_id, email: row["email"], created_at: to_datetime(row["created_at"]) }
|
{ user_id: user_id, email: row["email"], created_at: to_datetime(row["created_at"]) }
|
||||||
end
|
end
|
||||||
|
|
||||||
|
@ -474,7 +473,7 @@ class BulkImport::Generic < BulkImport::Base
|
||||||
puts "", "Importing user profiles..."
|
puts "", "Importing user profiles..."
|
||||||
|
|
||||||
users = query(<<~SQL)
|
users = query(<<~SQL)
|
||||||
SELECT id, bio, location
|
SELECT id, bio, location, website, anonymized
|
||||||
FROM users
|
FROM users
|
||||||
ORDER BY id
|
ORDER BY id
|
||||||
SQL
|
SQL
|
||||||
|
@ -485,7 +484,13 @@ class BulkImport::Generic < BulkImport::Base
|
||||||
user_id = user_id_from_imported_id(row["id"])
|
user_id = user_id_from_imported_id(row["id"])
|
||||||
next if user_id && existing_user_ids.include?(user_id)
|
next if user_id && existing_user_ids.include?(user_id)
|
||||||
|
|
||||||
{ user_id: user_id, bio_raw: row["bio"], location: row["location"] }
|
if row["anonymized"] == 1
|
||||||
|
row["bio"] = nil
|
||||||
|
row["location"] = nil
|
||||||
|
row["website"] = nil
|
||||||
|
end
|
||||||
|
|
||||||
|
{ user_id: user_id, bio_raw: row["bio"], location: row["location"], website: row["website"] }
|
||||||
end
|
end
|
||||||
|
|
||||||
users.close
|
users.close
|
||||||
|
@ -2408,6 +2413,16 @@ class BulkImport::Generic < BulkImport::Base
|
||||||
def to_boolean(value)
|
def to_boolean(value)
|
||||||
value == 1
|
value == 1
|
||||||
end
|
end
|
||||||
|
|
||||||
|
def anon_username_suffix
|
||||||
|
while true
|
||||||
|
suffix = (SecureRandom.random_number * 100_000_000).to_i
|
||||||
|
break if @anonymized_user_suffixes.exclude?(suffix)
|
||||||
|
end
|
||||||
|
|
||||||
|
@anonymized_user_suffixes << suffix
|
||||||
|
suffix
|
||||||
|
end
|
||||||
end
|
end
|
||||||
|
|
||||||
BulkImport::Generic.new(ARGV[0], ARGV[1]).start
|
BulkImport::Generic.new(ARGV[0], ARGV[1]).start
|
||||||
|
|
Loading…
Reference in New Issue