DEV: make sure we don't load all data into memory when exporting chat messages (#22276)
This commit makes sure we don't load all data into memory when doing CSV exports.
The most important change here made to the recently introduced export of chat
messages (3ea31f4
). We were loading all data into memory in the first version, with
this commit it's not the case anymore.
Speaking of old exports. Some of them already use find_each, and it worked as
expected, without loading all data into memory. And it will proceed working as
expected after this commit.
In general, I made sure this change didn't break other CSV exports, first manually, and
then by writing system specs for them. Sadly, I haven't managed yet to make those
specs stable, they work fine locally, but flaky in GitHub actions, so I've disabled them
for now.
I'll be making more changes to the CSV exports code soon, those system specs will be
very helpful. I'll be running them locally, and I hope I'll manage to make them stable
while doing that work.
This commit is contained in:
parent
b7404373cf
commit
fbe0e4c78c
|
@ -86,25 +86,9 @@ module Jobs
|
|||
|
||||
filename = entities[0][:filename] # use first entity as a name for this export
|
||||
user_export = UserExport.create(file_name: filename, user_id: @current_user.id)
|
||||
|
||||
filename = "#{filename}-#{user_export.id}"
|
||||
dirname = "#{UserExport.base_directory}/#{filename}"
|
||||
|
||||
# ensure directory exists
|
||||
FileUtils.mkdir_p(dirname) unless Dir.exist?(dirname)
|
||||
# Generate a compressed CSV file
|
||||
begin
|
||||
entities.each do |entity|
|
||||
CSV.open("#{dirname}/#{entity[:filename]}.csv", "w") do |csv|
|
||||
csv << get_header(entity[:name]) if entity[:name] != "report"
|
||||
public_send(entity[:method]).each { |d| csv << d }
|
||||
end
|
||||
end
|
||||
|
||||
zip_filename = Compression::Zip.new.compress(UserExport.base_directory, filename)
|
||||
ensure
|
||||
FileUtils.rm_rf(dirname)
|
||||
end
|
||||
zip_filename = write_to_csv_and_zip(filename, entities)
|
||||
|
||||
# create upload
|
||||
upload = nil
|
||||
|
@ -476,5 +460,22 @@ module Jobs
|
|||
|
||||
post
|
||||
end
|
||||
|
||||
def write_to_csv_and_zip(filename, entities)
|
||||
dirname = "#{UserExport.base_directory}/#{filename}"
|
||||
FileUtils.mkdir_p(dirname) unless Dir.exist?(dirname)
|
||||
begin
|
||||
entities.each do |entity|
|
||||
CSV.open("#{dirname}/#{entity[:filename]}.csv", "w") do |csv|
|
||||
csv << get_header(entity[:name]) if entity[:name] != "report"
|
||||
public_send(entity[:method]) { |d| csv << d }
|
||||
end
|
||||
end
|
||||
|
||||
Compression::Zip.new.compress(UserExport.base_directory, filename)
|
||||
ensure
|
||||
FileUtils.rm_rf(dirname)
|
||||
end
|
||||
end
|
||||
end
|
||||
end
|
||||
|
|
|
@ -8,27 +8,30 @@ module Chat
|
|||
Chat::Message
|
||||
.unscoped
|
||||
.where(created_at: 6.months.ago..Time.current)
|
||||
.joins(:chat_channel)
|
||||
.joins(:user)
|
||||
.joins("INNER JOIN users last_editors ON chat_messages.last_editor_id = last_editors.id")
|
||||
.order(:created_at)
|
||||
.includes(:chat_channel)
|
||||
.includes(:user)
|
||||
.includes(:last_editor)
|
||||
.limit(LIMIT)
|
||||
.pluck(
|
||||
"chat_messages.id",
|
||||
"chat_channels.id",
|
||||
"chat_channels.name",
|
||||
"users.id",
|
||||
"users.username",
|
||||
"chat_messages.message",
|
||||
"chat_messages.cooked",
|
||||
"chat_messages.created_at",
|
||||
"chat_messages.updated_at",
|
||||
"chat_messages.deleted_at",
|
||||
"chat_messages.in_reply_to_id",
|
||||
"last_editors.id",
|
||||
"last_editors.username",
|
||||
.find_each do |chat_message|
|
||||
yield(
|
||||
[
|
||||
chat_message.id,
|
||||
chat_message.chat_channel.id,
|
||||
chat_message.chat_channel.name,
|
||||
chat_message.user.id,
|
||||
chat_message.user.username,
|
||||
chat_message.message,
|
||||
chat_message.cooked,
|
||||
chat_message.created_at,
|
||||
chat_message.updated_at,
|
||||
chat_message.deleted_at,
|
||||
chat_message.in_reply_to&.id,
|
||||
chat_message.last_editor&.id,
|
||||
chat_message.last_editor&.username,
|
||||
]
|
||||
)
|
||||
end
|
||||
end
|
||||
|
||||
def get_header(entity)
|
||||
if entity === "chat_message"
|
||||
|
|
|
@ -22,7 +22,8 @@ describe Chat::MessagesExporter do
|
|||
it "exports messages" do
|
||||
exporter = Class.new.extend(Chat::MessagesExporter)
|
||||
|
||||
result = exporter.chat_message_export.to_a
|
||||
result = []
|
||||
exporter.chat_message_export { |data_row| result << data_row }
|
||||
|
||||
expect(result.length).to be(7)
|
||||
assert_exported_message(result[0], public_channel_message_1)
|
||||
|
|
|
@ -0,0 +1,63 @@
|
|||
# frozen_string_literal: true
|
||||
|
||||
RSpec.describe "Chat CSV exports", type: :system do
|
||||
fab!(:admin) { Fabricate(:admin) }
|
||||
let(:csv_export_pm_page) { PageObjects::Pages::CSVExportPM.new }
|
||||
|
||||
before do
|
||||
Jobs.run_immediately!
|
||||
sign_in(admin)
|
||||
chat_system_bootstrap
|
||||
end
|
||||
|
||||
xit "exports chat messages" do
|
||||
message = Fabricate(:chat_message)
|
||||
|
||||
visit "/admin/plugins/chat"
|
||||
click_button "Create export"
|
||||
|
||||
visit "/u/#{admin.username}/messages"
|
||||
click_link "[Chat Message] Data export complete"
|
||||
expect(csv_export_pm_page).to have_download_link
|
||||
exported_data = csv_export_pm_page.download_and_extract
|
||||
|
||||
expect(exported_data.first).to eq(
|
||||
%w[
|
||||
id
|
||||
chat_channel_id
|
||||
chat_channel_name
|
||||
user_id
|
||||
username
|
||||
message
|
||||
cooked
|
||||
created_at
|
||||
updated_at
|
||||
deleted_at
|
||||
in_reply_to_id
|
||||
last_editor_id
|
||||
last_editor_username
|
||||
],
|
||||
)
|
||||
|
||||
time_format = "%Y-%m-%d %H:%M:%S UTC"
|
||||
expect(exported_data.second).to eq(
|
||||
[
|
||||
message.id.to_s,
|
||||
message.chat_channel.id.to_s,
|
||||
message.chat_channel.name,
|
||||
message.user.id.to_s,
|
||||
message.user.username,
|
||||
message.message,
|
||||
message.cooked,
|
||||
message.created_at.strftime(time_format),
|
||||
message.updated_at.strftime(time_format),
|
||||
nil,
|
||||
nil,
|
||||
message.last_editor.id.to_s,
|
||||
message.last_editor.username,
|
||||
],
|
||||
)
|
||||
ensure
|
||||
csv_export_pm_page.clear_downloads
|
||||
end
|
||||
end
|
|
@ -77,6 +77,7 @@ end
|
|||
# in spec/support/ and its subdirectories.
|
||||
Dir[Rails.root.join("spec/support/**/*.rb")].each { |f| require f }
|
||||
|
||||
Dir[Rails.root.join("spec/system/helpers/**/*.rb")].each { |f| require f }
|
||||
Dir[Rails.root.join("spec/system/page_objects/**/base.rb")].each { |f| require f }
|
||||
Dir[Rails.root.join("spec/system/page_objects/**/*.rb")].each { |f| require f }
|
||||
|
||||
|
@ -286,6 +287,7 @@ RSpec.configure do |config|
|
|||
.tap do |options|
|
||||
apply_base_chrome_options(options)
|
||||
options.add_argument("--window-size=1400,1400")
|
||||
options.add_preference("download.default_directory", Downloads::FOLDER)
|
||||
end
|
||||
|
||||
Capybara.register_driver :selenium_chrome do |app|
|
||||
|
@ -348,6 +350,7 @@ RSpec.configure do |config|
|
|||
|
||||
config.after(:suite) do
|
||||
FileUtils.remove_dir(concurrency_safe_tmp_dir, true) if SpecSecureRandom.value
|
||||
Downloads.clear
|
||||
end
|
||||
|
||||
config.around :each do |example|
|
||||
|
|
|
@ -0,0 +1,314 @@
|
|||
# frozen_string_literal: true
|
||||
|
||||
RSpec.describe "CSV Exports", type: :system do
|
||||
fab!(:admin) { Fabricate(:admin) }
|
||||
let(:csv_export_pm_page) { PageObjects::Pages::CSVExportPM.new }
|
||||
|
||||
time_format = "%Y-%m-%d %H:%M:%S UTC"
|
||||
|
||||
before do
|
||||
Jobs.run_immediately!
|
||||
sign_in(admin)
|
||||
end
|
||||
|
||||
context "with user list" do
|
||||
fab!(:group1) { Fabricate(:group) }
|
||||
fab!(:group2) { Fabricate(:group) }
|
||||
fab!(:user) do
|
||||
Fabricate(
|
||||
:user,
|
||||
title: "dr",
|
||||
last_seen_at: Time.now,
|
||||
last_posted_at: Time.now,
|
||||
last_emailed_at: Time.now,
|
||||
approved: true,
|
||||
suspended_at: Time.now,
|
||||
suspended_till: Time.now,
|
||||
silenced_till: Time.now,
|
||||
admin: true,
|
||||
moderator: true,
|
||||
staged: true,
|
||||
group_ids: [group1.id, group2.id],
|
||||
)
|
||||
end
|
||||
let(:second_email) { "second_email@discourse.org" }
|
||||
let(:third_email) { "third_email@discourse.org" }
|
||||
|
||||
before do
|
||||
user.user_emails.create!(email: second_email)
|
||||
user.user_emails.create!(email: third_email)
|
||||
|
||||
user.user_stat.topics_entered = 111
|
||||
user.user_stat.posts_read_count = 112
|
||||
user.user_stat.time_read = 113
|
||||
user.user_stat.topic_count = 114
|
||||
user.user_stat.post_count = 115
|
||||
user.user_stat.likes_given = 116
|
||||
user.user_stat.likes_received = 117
|
||||
user.user_stat.save!
|
||||
|
||||
user.user_profile.location = "Tbilisi"
|
||||
user.user_profile.website = "https://www.discourse.org"
|
||||
user.user_profile.views = 5
|
||||
user.user_profile.save!
|
||||
end
|
||||
|
||||
xit "exports data" do
|
||||
visit "admin/users/list/active"
|
||||
click_button "Export"
|
||||
|
||||
visit "/u/#{admin.username}/messages"
|
||||
click_link "[User List] Data export complete"
|
||||
expect(csv_export_pm_page).to have_download_link
|
||||
exported_data = csv_export_pm_page.download_and_extract
|
||||
|
||||
expect(exported_data.length).to be(5)
|
||||
expect(exported_data.first).to eq(
|
||||
%w[
|
||||
id
|
||||
name
|
||||
username
|
||||
email
|
||||
title
|
||||
created_at
|
||||
last_seen_at
|
||||
last_posted_at
|
||||
last_emailed_at
|
||||
trust_level
|
||||
approved
|
||||
suspended_at
|
||||
suspended_till
|
||||
silenced_till
|
||||
active
|
||||
admin
|
||||
moderator
|
||||
ip_address
|
||||
staged
|
||||
secondary_emails
|
||||
topics_entered
|
||||
posts_read_count
|
||||
time_read
|
||||
topic_count
|
||||
post_count
|
||||
likes_given
|
||||
likes_received
|
||||
location
|
||||
website
|
||||
views
|
||||
group_names
|
||||
],
|
||||
)
|
||||
expect(exported_data.last).to eq(
|
||||
[
|
||||
user.id.to_s,
|
||||
user.name,
|
||||
user.username,
|
||||
user.email,
|
||||
user.title,
|
||||
user.created_at.strftime(time_format),
|
||||
user.last_seen_at.strftime(time_format),
|
||||
user.last_posted_at.strftime(time_format),
|
||||
user.last_emailed_at.strftime(time_format),
|
||||
user.trust_level.to_s,
|
||||
user.approved.to_s,
|
||||
user.suspended_at.strftime(time_format),
|
||||
user.suspended_till.strftime(time_format),
|
||||
user.silenced_till.strftime(time_format),
|
||||
user.active.to_s,
|
||||
user.admin.to_s,
|
||||
user.moderator.to_s,
|
||||
user.ip_address.to_s,
|
||||
user.staged.to_s,
|
||||
"#{second_email};#{third_email}",
|
||||
user.user_stat.topics_entered.to_s,
|
||||
user.user_stat.posts_read_count.to_s,
|
||||
user.user_stat.time_read.to_s,
|
||||
user.user_stat.topic_count.to_s,
|
||||
user.user_stat.post_count.to_s,
|
||||
user.user_stat.likes_given.to_s,
|
||||
user.user_stat.likes_received.to_s,
|
||||
user.user_profile.location,
|
||||
user.user_profile.website,
|
||||
user.user_profile.views.to_s,
|
||||
"#{group1.name};#{group2.name}",
|
||||
],
|
||||
)
|
||||
ensure
|
||||
csv_export_pm_page.clear_downloads
|
||||
end
|
||||
end
|
||||
|
||||
context "with stuff actions log" do
|
||||
fab!(:user_history) do
|
||||
Fabricate(
|
||||
:user_history,
|
||||
acting_user: admin,
|
||||
action: UserHistory.actions[:change_site_setting],
|
||||
subject: "default_trust_level",
|
||||
details: "details",
|
||||
context: "context",
|
||||
)
|
||||
end
|
||||
|
||||
xit "exports data" do
|
||||
visit "admin/logs/staff_action_logs"
|
||||
click_button "Export"
|
||||
|
||||
visit "/u/#{admin.username}/messages"
|
||||
click_link "[Staff Action] Data export complete"
|
||||
expect(csv_export_pm_page).to have_download_link
|
||||
exported_data = csv_export_pm_page.download_and_extract
|
||||
|
||||
expect(exported_data.first).to eq(%w[staff_user action subject created_at details context])
|
||||
|
||||
expect(exported_data.last).to eq(
|
||||
[
|
||||
user_history.acting_user.username,
|
||||
"change_site_setting",
|
||||
user_history.subject,
|
||||
user_history.created_at.strftime(time_format),
|
||||
user_history.details,
|
||||
user_history.context,
|
||||
],
|
||||
)
|
||||
ensure
|
||||
csv_export_pm_page.clear_downloads
|
||||
end
|
||||
end
|
||||
|
||||
context "with reports" do
|
||||
before do
|
||||
freeze_time # otherwise the test can fail when ran in midnight
|
||||
Fabricate(:bookmark)
|
||||
end
|
||||
|
||||
xit "exports the Bookmarks report" do
|
||||
visit "admin/reports/bookmarks"
|
||||
click_button "Export"
|
||||
|
||||
visit "/u/#{admin.username}/messages"
|
||||
click_link "[Bookmarks] Data export complete"
|
||||
expect(csv_export_pm_page).to have_download_link
|
||||
exported_data = csv_export_pm_page.download_and_extract
|
||||
|
||||
expect(exported_data.length).to be(2)
|
||||
expect(exported_data.first).to eq(%w[Day Count])
|
||||
expect(exported_data.second).to eq([Time.now.strftime("%Y-%m-%d"), "1"])
|
||||
ensure
|
||||
csv_export_pm_page.clear_downloads
|
||||
end
|
||||
end
|
||||
|
||||
context "with screened emails" do
|
||||
fab!(:screened_email) do
|
||||
Fabricate(
|
||||
:screened_email,
|
||||
action_type: ScreenedEmail.actions[:do_nothing],
|
||||
match_count: 5,
|
||||
last_match_at: Time.now,
|
||||
created_at: Time.now,
|
||||
ip_address: "94.99.101.228",
|
||||
)
|
||||
end
|
||||
|
||||
xit "exports data" do
|
||||
visit "admin/logs/screened_emails"
|
||||
click_button "Export"
|
||||
|
||||
visit "/u/#{admin.username}/messages"
|
||||
click_link "[Screened Email] Data export complete"
|
||||
expect(csv_export_pm_page).to have_download_link
|
||||
exported_data = csv_export_pm_page.download_and_extract
|
||||
|
||||
expect(exported_data.length).to be(2)
|
||||
expect(exported_data.first).to eq(
|
||||
%w[email action match_count last_match_at created_at ip_address],
|
||||
)
|
||||
expect(exported_data.second).to eq(
|
||||
[
|
||||
screened_email.email,
|
||||
"do_nothing",
|
||||
screened_email.match_count.to_s,
|
||||
screened_email.last_match_at.strftime(time_format),
|
||||
screened_email.created_at.strftime(time_format),
|
||||
screened_email.ip_address.to_s,
|
||||
],
|
||||
)
|
||||
ensure
|
||||
csv_export_pm_page.clear_downloads
|
||||
end
|
||||
end
|
||||
|
||||
context "with screened ips" do
|
||||
fab!(:screened_ip) do
|
||||
Fabricate(
|
||||
:screened_ip_address,
|
||||
action_type: ScreenedIpAddress.actions[:do_nothing],
|
||||
match_count: 5,
|
||||
ip_address: "99.232.23.124",
|
||||
last_match_at: Time.now,
|
||||
created_at: Time.now,
|
||||
)
|
||||
end
|
||||
|
||||
xit "exports data" do
|
||||
visit "admin/logs/screened_ip_addresses"
|
||||
click_button "Export"
|
||||
|
||||
visit "/u/#{admin.username}/messages"
|
||||
click_link "[Screened Ip] Data export complete"
|
||||
expect(csv_export_pm_page).to have_download_link
|
||||
exported_data = csv_export_pm_page.download_and_extract
|
||||
|
||||
expect(exported_data.first).to eq(%w[ip_address action match_count last_match_at created_at])
|
||||
expect(exported_data.second).to eq(
|
||||
[
|
||||
screened_ip.ip_address.to_s,
|
||||
"do_nothing",
|
||||
screened_ip.match_count.to_s,
|
||||
screened_ip.last_match_at.strftime(time_format),
|
||||
screened_ip.created_at.strftime(time_format),
|
||||
],
|
||||
)
|
||||
ensure
|
||||
csv_export_pm_page.clear_downloads
|
||||
end
|
||||
end
|
||||
|
||||
context "with screened urls" do
|
||||
fab!(:screened_url) do
|
||||
Fabricate(
|
||||
:screened_url,
|
||||
action_type: ScreenedUrl.actions[:do_nothing],
|
||||
match_count: 5,
|
||||
domain: "https://discourse.org",
|
||||
last_match_at: Time.now,
|
||||
created_at: Time.now,
|
||||
)
|
||||
end
|
||||
|
||||
xit "exports data" do
|
||||
visit "admin/logs/screened_urls"
|
||||
click_button "Export"
|
||||
|
||||
visit "/u/#{admin.username}/messages"
|
||||
click_link "[Screened Url] Data export complete"
|
||||
expect(csv_export_pm_page).to have_download_link
|
||||
exported_data = csv_export_pm_page.download_and_extract
|
||||
|
||||
expect(exported_data.length).to be(2)
|
||||
expect(exported_data.first).to eq(%w[domain action match_count last_match_at created_at])
|
||||
expect(exported_data.second).to eq(
|
||||
[
|
||||
screened_url.domain,
|
||||
"do nothing",
|
||||
screened_url.match_count.to_s,
|
||||
screened_url.last_match_at.strftime(time_format),
|
||||
screened_url.created_at.strftime(time_format),
|
||||
],
|
||||
)
|
||||
ensure
|
||||
csv_export_pm_page.clear_downloads
|
||||
end
|
||||
end
|
||||
end
|
|
@ -0,0 +1,30 @@
|
|||
# frozen_string_literal: true
|
||||
|
||||
class Downloads
|
||||
FOLDER = Rails.root.join("tmp/downloads")
|
||||
TIMEOUT = 10
|
||||
|
||||
def self.wait_for_download
|
||||
Timeout.timeout(TIMEOUT) { sleep 0.1 until downloaded? }
|
||||
end
|
||||
|
||||
def self.clear
|
||||
FileUtils.rm_rf(FOLDER)
|
||||
end
|
||||
|
||||
private
|
||||
|
||||
def self.downloaded?
|
||||
!downloading? && downloads.any?
|
||||
end
|
||||
|
||||
def self.downloading?
|
||||
downloads.grep(/\.crdownload$/).any?
|
||||
end
|
||||
|
||||
def self.downloads
|
||||
Dir[FOLDER.join("*")]
|
||||
end
|
||||
|
||||
private_class_method :downloaded?, :downloading?, :downloads
|
||||
end
|
|
@ -0,0 +1,49 @@
|
|||
# frozen_string_literal: true
|
||||
|
||||
module PageObjects
|
||||
module Pages
|
||||
class CSVExportPM < PageObjects::Pages::Base
|
||||
def initialize
|
||||
super
|
||||
@downloaded_files = []
|
||||
end
|
||||
|
||||
def download_and_extract
|
||||
click_link ".zip"
|
||||
Downloads.wait_for_download
|
||||
|
||||
zip_name = find("a.attachment").text
|
||||
zip_path = File.join(Downloads::FOLDER, zip_name)
|
||||
@downloaded_files << zip_path
|
||||
|
||||
csv_path = unzip(zip_path).first
|
||||
@downloaded_files << csv_path
|
||||
CSV.read(csv_path)
|
||||
end
|
||||
|
||||
def clear_downloads
|
||||
@downloaded_files.each { |file| FileUtils.rm(file) }
|
||||
@downloaded_files = []
|
||||
end
|
||||
|
||||
def has_download_link?
|
||||
find(:link, ".zip").present?
|
||||
end
|
||||
|
||||
private
|
||||
|
||||
def unzip(file)
|
||||
paths = []
|
||||
Zip::File.open(file) do |zip_file|
|
||||
zip_file.each do |f|
|
||||
path = File.join(Downloads::FOLDER, f.name)
|
||||
zip_file.extract(f, path)
|
||||
paths << path
|
||||
end
|
||||
end
|
||||
|
||||
paths
|
||||
end
|
||||
end
|
||||
end
|
||||
end
|
Loading…
Reference in New Issue