discourse/app/jobs/regular/export_csv_file.rb

Ignoring revisions in .git-blame-ignore-revs. Click here to bypass and see the normal blame view.

467 lines
13 KiB
Ruby
Raw Normal View History

# frozen_string_literal: true
2014-08-09 06:28:57 -04:00
require "csv"
module Jobs
class ExportCsvFile < ::Jobs::Base
2014-08-09 06:28:57 -04:00
sidekiq_options retry: false
attr_accessor :extra
attr_accessor :current_user
attr_accessor :entity
HEADER_ATTRS_FOR ||=
HashWithIndifferentAccess.new(
user_list: %w[
id
name
username
email
title
created_at
last_seen_at
last_posted_at
last_emailed_at
trust_level
approved
suspended_at
suspended_till
silenced_till
active
admin
moderator
ip_address
staged
secondary_emails
2018-12-04 04:48:16 -05:00
],
user_stats: %w[
topics_entered
posts_read_count
time_read
topic_count
post_count
likes_given
likes_received
],
user_profile: %w[location website views],
user_sso: %w[
external_id
external_email
external_username
external_name
external_avatar_url
],
staff_action: %w[staff_user action subject created_at details context],
screened_email: %w[email action match_count last_match_at created_at ip_address],
2018-12-04 04:48:16 -05:00
screened_ip: %w[ip_address action match_count last_match_at created_at],
screened_url: %w[domain action match_count last_match_at created_at],
report: %w[date value],
)
2014-08-09 06:28:57 -04:00
def execute(args)
@entity = args[:entity]
@extra = HashWithIndifferentAccess.new(args[:args]) if args[:args]
2014-08-09 06:28:57 -04:00
@current_user = User.find_by(id: args[:user_id])
entity = { name: @entity }
entity[:method] = :"#{entity[:name]}_export"
raise Discourse::InvalidParameters.new(:entity) unless respond_to?(entity[:method])
@timestamp ||= Time.now.strftime("%y%m%d-%H%M%S")
entity[:filename] = if entity[:name] == "report" && @extra[:name].present?
"#{@extra[:name].dasherize}-#{@timestamp}"
else
"#{entity[:name].dasherize}-#{@timestamp}"
end
export_title =
if @entity == "report" && @extra[:name].present?
I18n.t("reports.#{@extra[:name]}.title")
else
@entity.gsub("_", " ").titleize
end
filename = entity[:filename]
user_export = UserExport.create(file_name: filename, user_id: @current_user.id)
filename = "#{filename}-#{user_export.id}"
zip_filename = write_to_csv_and_zip(filename, entity)
# create upload
upload = nil
if File.exist?(zip_filename)
File.open(zip_filename) do |file|
upload =
UploadCreator.new(
file,
File.basename(zip_filename),
type: "csv_export",
for_export: "true",
).create_for(@current_user.id)
if upload.persisted?
user_export.update_columns(upload_id: upload.id)
else
Rails.logger.warn(
"Failed to upload the file #{zip_filename}: #{upload.errors.full_messages}",
)
end
end
File.delete(zip_filename)
end
ensure
post = notify_user(upload, export_title)
if user_export.present? && post.present?
topic = post.topic
user_export.update_columns(topic_id: topic.id)
topic.update_status("closed", true, Discourse.system_user)
end
2014-08-09 06:28:57 -04:00
end
2015-01-02 01:59:05 -05:00
def user_list_export
user_field_ids = UserField.pluck(:id)
condition = {}
if @extra && @extra[:trust_level] &&
trust_level = TrustLevel.levels[@extra[:trust_level].to_sym]
condition = { trust_level: trust_level }
end
includes = %i[user_profile user_stat groups user_emails]
FEATURE: Rename 'Discourse SSO' to DiscourseConnect (#11978) The 'Discourse SSO' protocol is being rebranded to DiscourseConnect. This should help to reduce confusion when 'SSO' is used in the generic sense. This commit aims to: - Rename `sso_` site settings. DiscourseConnect specific ones are prefixed `discourse_connect_`. Generic settings are prefixed `auth_` - Add (server-side-only) backwards compatibility for the old setting names, with deprecation notices - Copy `site_settings` database records to the new names - Rename relevant translation keys - Update relevant translations This commit does **not** aim to: - Rename any Ruby classes or methods. This might be done in a future commit - Change any URLs. This would break existing integrations - Make any changes to the protocol. This would break existing integrations - Change any functionality. Further normalization across DiscourseConnect and other auth methods will be done separately The risks are: - There is no backwards compatibility for site settings on the client-side. Accessing auth-related site settings in Javascript is fairly rare, and an error on the client side would not be security-critical. - If a plugin is monkey-patching parts of the auth process, changes to locale keys could cause broken error messages. This should also be unlikely. The old site setting names remain functional, so security-related overrides will remain working. A follow-up commit will be made with a post-deploy migration to delete the old `site_settings` rows.
2021-02-08 05:04:33 -05:00
includes << [:single_sign_on_record] if SiteSetting.enable_discourse_connect
User
.where(condition)
.includes(*includes)
.find_each do |user|
user_info_array = get_base_user_array(user)
if SiteSetting.enable_discourse_connect
2016-07-19 02:43:50 -04:00
user_info_array = add_single_sign_on(user, user_info_array)
end
user_info_array = add_custom_fields(user, user_info_array, user_field_ids)
user_info_array = add_group_names(user, user_info_array)
yield user_info_array
end
end
2014-12-06 23:15:22 -05:00
def staff_action_export
staff_action_data =
if @current_user.admin?
UserHistory.only_staff_actions
else
UserHistory.where(admin_only: false).only_staff_actions
end
staff_action_data.find_each(order: :desc) do |staff_action|
yield get_staff_action_fields(staff_action)
end
2014-12-06 23:15:22 -05:00
end
2014-12-06 23:15:22 -05:00
def screened_email_export
ScreenedEmail.find_each(order: :desc) do |screened_email|
yield get_screened_email_fields(screened_email)
end
2014-12-06 23:15:22 -05:00
end
def screened_ip_export
ScreenedIpAddress.find_each(order: :desc) do |screened_ip|
yield get_screened_ip_fields(screened_ip)
end
end
2014-12-06 23:15:22 -05:00
def screened_url_export
ScreenedUrl
.select(
"domain, sum(match_count) as match_count, max(last_match_at) as last_match_at, min(created_at) as created_at",
)
.group(:domain)
.order("last_match_at DESC")
.each { |screened_url| yield get_screened_url_fields(screened_url) }
2014-12-06 23:15:22 -05:00
end
def report_export
# If dates are invalid consider then `nil`
if @extra[:start_date].is_a?(String)
@extra[:start_date] = begin
@extra[:start_date].to_date.beginning_of_day
rescue StandardError
nil
end
end
if @extra[:end_date].is_a?(String)
@extra[:end_date] = begin
@extra[:end_date].to_date.end_of_day
rescue StandardError
nil
end
end
@extra[:filters] = {}
@extra[:filters][:category] = @extra[:category].to_i if @extra[:category].present?
@extra[:filters][:group] = @extra[:group].to_i if @extra[:group].present?
@extra[:filters][:include_subcategories] = !!ActiveRecord::Type::Boolean.new.cast(
@extra[:include_subcategories],
) if @extra[:include_subcategories].present?
report = Report.find(@extra[:name], @extra)
header = []
titles = {}
report.labels.each do |label|
if label[:type] == :user
titles[label[:properties][:username]] = label[:title]
header << label[:properties][:username]
elsif label[:type] == :topic
titles[label[:properties][:id]] = label[:title]
header << label[:properties][:id]
else
titles[label[:property]] = label[:title]
header << label[:property]
end
end
if report.modes == [:stacked_chart]
header = [:x]
data = {}
report.data.map do |series|
header << series[:label]
series[:data].each do |datapoint|
data[datapoint[:x]] ||= { x: datapoint[:x] }
data[datapoint[:x]][series[:label]] = datapoint[:y]
end
end
data = data.values
else
data = report.data
end
yield header.map { |k| titles[k] || k }
data.each { |row| yield row.values_at(*header).map(&:to_s) }
end
def get_header(entity)
if entity == "user_list"
header_array =
HEADER_ATTRS_FOR["user_list"] + HEADER_ATTRS_FOR["user_stats"] +
HEADER_ATTRS_FOR["user_profile"]
FEATURE: Rename 'Discourse SSO' to DiscourseConnect (#11978) The 'Discourse SSO' protocol is being rebranded to DiscourseConnect. This should help to reduce confusion when 'SSO' is used in the generic sense. This commit aims to: - Rename `sso_` site settings. DiscourseConnect specific ones are prefixed `discourse_connect_`. Generic settings are prefixed `auth_` - Add (server-side-only) backwards compatibility for the old setting names, with deprecation notices - Copy `site_settings` database records to the new names - Rename relevant translation keys - Update relevant translations This commit does **not** aim to: - Rename any Ruby classes or methods. This might be done in a future commit - Change any URLs. This would break existing integrations - Make any changes to the protocol. This would break existing integrations - Change any functionality. Further normalization across DiscourseConnect and other auth methods will be done separately The risks are: - There is no backwards compatibility for site settings on the client-side. Accessing auth-related site settings in Javascript is fairly rare, and an error on the client side would not be security-critical. - If a plugin is monkey-patching parts of the auth process, changes to locale keys could cause broken error messages. This should also be unlikely. The old site setting names remain functional, so security-related overrides will remain working. A follow-up commit will be made with a post-deploy migration to delete the old `site_settings` rows.
2021-02-08 05:04:33 -05:00
header_array.concat(HEADER_ATTRS_FOR["user_sso"]) if SiteSetting.enable_discourse_connect
user_custom_fields = UserField.all
if user_custom_fields.present?
user_custom_fields.each do |custom_field|
header_array.push("#{custom_field.name} (custom user field)")
end
2014-12-06 23:15:22 -05:00
end
header_array.push("group_names")
else
header_array = HEADER_ATTRS_FOR[entity]
end
header_array
end
2014-08-09 06:28:57 -04:00
private
2016-07-19 02:43:50 -04:00
def escape_comma(string)
string&.include?(",") ? %Q|"#{string}"| : string
2018-06-07 01:28:18 -04:00
end
2016-07-19 02:43:50 -04:00
def get_base_user_array(user)
# preloading scopes is hard, do this by hand
secondary_emails = []
primary_email = nil
user.user_emails.each do |user_email|
if user_email.primary?
primary_email = user_email.email
else
secondary_emails << user_email.email
end
end
[
user.id,
escape_comma(user.name),
user.username,
primary_email,
escape_comma(user.title),
user.created_at,
user.last_seen_at,
user.last_posted_at,
user.last_emailed_at,
user.trust_level,
user.approved,
user.suspended_at,
user.suspended_till,
user.silenced_till,
user.active,
user.admin,
user.moderator,
user.ip_address,
user.staged,
secondary_emails.join(";"),
user.user_stat.topics_entered,
user.user_stat.posts_read_count,
user.user_stat.time_read,
user.user_stat.topic_count,
user.user_stat.post_count,
user.user_stat.likes_given,
user.user_stat.likes_received,
escape_comma(user.user_profile.location),
user.user_profile.website,
user.user_profile.views,
]
end
2016-07-19 02:43:50 -04:00
def add_single_sign_on(user, user_info_array)
if user.single_sign_on_record
2016-07-19 02:43:50 -04:00
user_info_array.push(
user.single_sign_on_record.external_id,
user.single_sign_on_record.external_email,
user.single_sign_on_record.external_username,
escape_comma(user.single_sign_on_record.external_name),
user.single_sign_on_record.external_avatar_url,
)
else
2016-07-19 02:43:50 -04:00
user_info_array.push(nil, nil, nil, nil, nil)
end
2016-07-19 02:43:50 -04:00
user_info_array
2018-06-07 01:28:18 -04:00
end
2016-07-19 02:43:50 -04:00
def add_custom_fields(user, user_info_array, user_field_ids)
if user_field_ids.present?
2016-07-19 02:43:50 -04:00
user.user_fields.each { |custom_field| user_info_array << escape_comma(custom_field[1]) }
end
2016-07-19 02:43:50 -04:00
user_info_array
2018-06-07 01:28:18 -04:00
end
2016-07-19 02:43:50 -04:00
def add_group_names(user, user_info_array)
group_names = user.groups.map { |g| g.name }.join(";")
if group_names.present?
user_info_array << escape_comma(group_names)
else
user_info_array << nil
end
2016-07-19 02:43:50 -04:00
user_info_array
2018-06-07 01:28:18 -04:00
end
2014-12-06 23:15:22 -05:00
def get_staff_action_fields(staff_action)
staff_action_array = []
2018-06-07 01:28:18 -04:00
2014-12-06 23:15:22 -05:00
HEADER_ATTRS_FOR["staff_action"].each do |attr|
2018-06-07 01:28:18 -04:00
data =
2014-12-06 23:15:22 -05:00
if attr == "action"
UserHistory.actions.key(staff_action.attributes[attr]).to_s
elsif attr == "staff_user"
user = User.find_by(id: staff_action.attributes["acting_user_id"])
user.username if !user.nil?
elsif attr == "subject"
user = User.find_by(id: staff_action.attributes["target_user_id"])
if user.nil?
staff_action.attributes[attr]
else
"#{user.username} #{staff_action.attributes[attr]}"
end
2018-06-07 01:28:18 -04:00
else
2014-12-06 23:15:22 -05:00
staff_action.attributes[attr]
end
staff_action_array.push(data)
end
staff_action_array
end
def get_screened_email_fields(screened_email)
screened_email_array = []
HEADER_ATTRS_FOR["screened_email"].each do |attr|
2018-06-07 01:28:18 -04:00
data =
2014-12-06 23:15:22 -05:00
if attr == "action"
ScreenedEmail.actions.key(screened_email.attributes["action_type"]).to_s
2018-06-07 01:28:18 -04:00
else
2014-12-06 23:15:22 -05:00
screened_email.attributes[attr]
2018-06-07 01:28:18 -04:00
end
2014-12-06 23:15:22 -05:00
screened_email_array.push(data)
end
screened_email_array
2018-06-07 01:28:18 -04:00
end
2014-12-06 23:15:22 -05:00
def get_screened_ip_fields(screened_ip)
screened_ip_array = []
HEADER_ATTRS_FOR["screened_ip"].each do |attr|
data =
if attr == "action"
ScreenedIpAddress.actions.key(screened_ip.attributes["action_type"]).to_s
2018-06-07 01:28:18 -04:00
else
2014-12-06 23:15:22 -05:00
screened_ip.attributes[attr]
end
screened_ip_array.push(data)
2014-12-06 23:15:22 -05:00
end
screened_ip_array
2018-06-07 01:28:18 -04:00
end
2014-12-06 23:15:22 -05:00
def get_screened_url_fields(screened_url)
screened_url_array = []
HEADER_ATTRS_FOR["screened_url"].each do |attr|
data =
if attr == "action"
action = ScreenedUrl.actions.key(screened_url.attributes["action_type"]).to_s
action = "do nothing" if action.blank?
2018-06-07 01:28:18 -04:00
else
2014-12-06 23:15:22 -05:00
screened_url.attributes[attr]
end
screened_url_array.push(data)
end
2014-12-06 23:15:22 -05:00
screened_url_array
2018-06-07 01:28:18 -04:00
end
def notify_user(upload, export_title)
post = nil
2014-08-09 06:28:57 -04:00
if @current_user
post =
if upload&.errors&.empty?
SystemMessage.create_from_system_user(
@current_user,
:csv_export_succeeded,
download_link: UploadMarkdown.new(upload).attachment_markdown,
export_title: export_title,
)
2014-08-09 06:28:57 -04:00
else
SystemMessage.create_from_system_user(@current_user, :csv_export_failed)
end
end
post
2018-06-07 01:28:18 -04:00
end
def write_to_csv_and_zip(filename, entity)
dirname = "#{UserExport.base_directory}/#{filename}"
FileUtils.mkdir_p(dirname) unless Dir.exist?(dirname)
begin
CSV.open("#{dirname}/#{entity[:filename]}.csv", "w") do |csv|
csv << get_header(entity[:name]) if entity[:name] != "report"
public_send(entity[:method]) { |d| csv << d }
end
Compression::Zip.new.compress(UserExport.base_directory, filename)
ensure
FileUtils.rm_rf(dirname)
end
end
2014-08-09 06:28:57 -04:00
end
end