2019-05-02 18:17:27 -04:00
|
|
|
# frozen_string_literal: true
|
|
|
|
|
2017-05-26 16:26:18 -04:00
|
|
|
require_relative "../base"
|
|
|
|
require_relative "support/database"
|
|
|
|
require_relative "support/indexer"
|
|
|
|
require_relative "support/settings"
|
|
|
|
|
|
|
|
module ImportScripts::Mbox
|
|
|
|
class Importer < ImportScripts::Base
|
2020-03-13 18:59:14 -04:00
|
|
|
def initialize(settings_filename)
|
|
|
|
@settings = Settings.load(settings_filename)
|
2017-05-26 16:26:18 -04:00
|
|
|
super()
|
|
|
|
|
|
|
|
@database = Database.new(@settings.data_dir, @settings.batch_size)
|
|
|
|
end
|
|
|
|
|
2017-11-18 07:53:21 -05:00
|
|
|
def get_site_settings_for_import
|
|
|
|
settings = super
|
|
|
|
settings[:enable_staged_users] = true
|
|
|
|
settings[:incoming_email_prefer_html] = @settings.prefer_html
|
|
|
|
settings
|
2017-05-26 16:26:18 -04:00
|
|
|
end
|
|
|
|
|
|
|
|
protected
|
|
|
|
|
|
|
|
def execute
|
|
|
|
index_messages
|
2018-01-17 06:03:57 -05:00
|
|
|
|
|
|
|
if @settings.index_only
|
|
|
|
@skip_updates = true
|
|
|
|
else
|
2020-03-07 19:20:02 -05:00
|
|
|
SiteSetting.tagging_enabled = true if @settings.tags.present?
|
|
|
|
|
2018-01-17 06:03:57 -05:00
|
|
|
import_categories
|
|
|
|
import_users
|
|
|
|
import_posts
|
|
|
|
end
|
2017-05-26 16:26:18 -04:00
|
|
|
end
|
|
|
|
|
|
|
|
def index_messages
|
|
|
|
puts "", "creating index"
|
|
|
|
indexer = Indexer.new(@database, @settings)
|
|
|
|
indexer.execute
|
|
|
|
end
|
|
|
|
|
|
|
|
def import_categories
|
|
|
|
puts "", "creating categories"
|
|
|
|
rows = @database.fetch_categories
|
|
|
|
|
|
|
|
create_categories(rows) do |row|
|
|
|
|
{
|
|
|
|
id: row["name"],
|
2022-04-29 12:24:29 -04:00
|
|
|
name: row["name"],
|
|
|
|
parent_category_id: row["parent_category_id"].presence,
|
2017-05-26 16:26:18 -04:00
|
|
|
}
|
|
|
|
end
|
|
|
|
end
|
|
|
|
|
|
|
|
def import_users
|
|
|
|
puts "", "creating users"
|
|
|
|
total_count = @database.count_users
|
|
|
|
last_email = ""
|
|
|
|
|
|
|
|
batches do |offset|
|
|
|
|
rows, last_email = @database.fetch_users(last_email)
|
|
|
|
break if rows.empty?
|
|
|
|
|
|
|
|
next if all_records_exist?(:users, rows.map { |row| row["email"] })
|
|
|
|
|
|
|
|
create_users(rows, total: total_count, offset: offset) do |row|
|
|
|
|
{
|
|
|
|
id: row["email"],
|
|
|
|
email: row["email"],
|
|
|
|
name: row["name"],
|
|
|
|
trust_level: @settings.trust_level,
|
2018-01-04 03:17:35 -05:00
|
|
|
staged: @settings.staged,
|
2018-03-06 05:32:12 -05:00
|
|
|
active: !@settings.staged,
|
2017-05-26 16:26:18 -04:00
|
|
|
created_at: to_time(row["date_of_first_message"]),
|
|
|
|
}
|
|
|
|
end
|
|
|
|
end
|
|
|
|
end
|
|
|
|
|
|
|
|
def batches
|
|
|
|
super(@settings.batch_size)
|
|
|
|
end
|
|
|
|
|
|
|
|
def import_posts
|
|
|
|
puts "", "creating topics and posts"
|
|
|
|
total_count = @database.count_messages
|
|
|
|
last_row_id = 0
|
|
|
|
|
|
|
|
batches do |offset|
|
|
|
|
rows, last_row_id = @database.fetch_messages(last_row_id)
|
|
|
|
break if rows.empty?
|
|
|
|
|
|
|
|
next if all_records_exist?(:posts, rows.map { |row| row["msg_id"] })
|
|
|
|
|
|
|
|
create_posts(rows, total: total_count, offset: offset) do |row|
|
2018-10-17 10:04:43 -04:00
|
|
|
begin
|
|
|
|
if row["email_date"].blank?
|
|
|
|
puts "Date is missing. Skipping #{row["msg_id"]}"
|
|
|
|
nil
|
|
|
|
elsif row["in_reply_to"].blank?
|
|
|
|
map_first_post(row)
|
|
|
|
else
|
|
|
|
map_reply(row)
|
|
|
|
end
|
|
|
|
rescue => e
|
|
|
|
puts "Failed to map post for #{row["msg_id"]}", e, e.backtrace.join("\n")
|
2017-05-26 16:26:18 -04:00
|
|
|
end
|
|
|
|
end
|
|
|
|
end
|
|
|
|
end
|
|
|
|
|
|
|
|
def map_post(row)
|
|
|
|
user_id = user_id_from_imported_user_id(row["from_email"]) || Discourse::SYSTEM_USER_ID
|
|
|
|
|
|
|
|
{
|
|
|
|
id: row["msg_id"],
|
|
|
|
user_id: user_id,
|
|
|
|
created_at: to_time(row["email_date"]),
|
2017-12-13 15:58:13 -05:00
|
|
|
raw: format_raw(row, user_id),
|
2017-05-26 16:26:18 -04:00
|
|
|
raw_email: row["raw_message"],
|
|
|
|
via_email: true,
|
2017-10-19 08:27:40 -04:00
|
|
|
post_create_action: proc { |post| create_incoming_email(post, row) },
|
2017-05-26 16:26:18 -04:00
|
|
|
}
|
|
|
|
end
|
|
|
|
|
2017-12-13 15:58:13 -05:00
|
|
|
def format_raw(row, user_id)
|
|
|
|
body = row["body"] || ""
|
|
|
|
elided = row["elided"]
|
|
|
|
|
|
|
|
if row["attachment_count"].positive?
|
|
|
|
receiver = Email::Receiver.new(row["raw_message"])
|
2018-10-17 10:48:09 -04:00
|
|
|
user = User.find(user_id)
|
|
|
|
body = receiver.add_attachments(body, user)
|
2017-12-13 15:58:13 -05:00
|
|
|
end
|
|
|
|
|
2020-03-13 18:59:14 -04:00
|
|
|
if elided.present? && @settings.show_trimmed_content
|
|
|
|
body = "#{body}#{Email::Receiver.elided_html(elided)}"
|
|
|
|
end
|
|
|
|
|
2017-11-15 11:48:36 -05:00
|
|
|
body
|
|
|
|
end
|
|
|
|
|
2017-05-26 16:26:18 -04:00
|
|
|
def map_first_post(row)
|
2024-07-11 17:22:13 -04:00
|
|
|
subject = row["subject"].dup
|
2020-03-07 19:20:02 -05:00
|
|
|
tags = remove_tags!(subject)
|
|
|
|
|
2017-05-26 16:26:18 -04:00
|
|
|
mapped = map_post(row)
|
|
|
|
mapped[:category] = category_id_from_imported_category_id(row["category"])
|
2020-03-07 19:20:02 -05:00
|
|
|
mapped[:title] = subject.strip[0...255]
|
|
|
|
mapped[:tags] = tags if tags.present?
|
2017-05-26 16:26:18 -04:00
|
|
|
mapped
|
|
|
|
end
|
|
|
|
|
|
|
|
def map_reply(row)
|
|
|
|
parent = @lookup.topic_lookup_from_imported_post_id(row["in_reply_to"])
|
|
|
|
|
|
|
|
if parent.blank?
|
|
|
|
puts "Parent message #{row["in_reply_to"]} doesn't exist. Skipping #{row["msg_id"]}: #{row["subject"][0..40]}"
|
|
|
|
return nil
|
|
|
|
end
|
|
|
|
|
|
|
|
mapped = map_post(row)
|
|
|
|
mapped[:topic_id] = parent[:topic_id]
|
|
|
|
mapped
|
|
|
|
end
|
|
|
|
|
2020-03-07 19:20:02 -05:00
|
|
|
def remove_tags!(subject)
|
|
|
|
tag_names = []
|
|
|
|
remove_prefixes!(subject)
|
|
|
|
|
|
|
|
loop do
|
|
|
|
old_length = subject.length
|
|
|
|
|
|
|
|
@settings.tags.each do |tag|
|
|
|
|
tag_names << tag[:name] if subject.sub!(tag[:regex], "") && tag[:name].present?
|
|
|
|
end
|
|
|
|
|
|
|
|
remove_prefixes!(subject) if subject.length != old_length
|
|
|
|
break if subject.length == old_length
|
|
|
|
end
|
|
|
|
|
|
|
|
tag_names.uniq
|
|
|
|
end
|
|
|
|
|
|
|
|
def remove_prefixes!(subject)
|
|
|
|
# There could be multiple prefixes...
|
|
|
|
loop do
|
|
|
|
if subject.sub!(@settings.subject_prefix_regex, "")
|
|
|
|
subject.strip!
|
|
|
|
else
|
|
|
|
break
|
|
|
|
end
|
|
|
|
end
|
|
|
|
end
|
|
|
|
|
2017-10-19 08:27:40 -04:00
|
|
|
def create_incoming_email(post, row)
|
|
|
|
IncomingEmail.create(
|
|
|
|
message_id: row["msg_id"],
|
|
|
|
raw: row["raw_message"],
|
|
|
|
subject: row["subject"],
|
|
|
|
from_address: row["from_email"],
|
|
|
|
user_id: post.user_id,
|
|
|
|
topic_id: post.topic_id,
|
|
|
|
post_id: post.id,
|
|
|
|
)
|
|
|
|
end
|
|
|
|
|
2018-08-23 03:46:25 -04:00
|
|
|
def to_time(timestamp)
|
|
|
|
Time.zone.at(timestamp) if timestamp
|
2017-05-26 16:26:18 -04:00
|
|
|
end
|
|
|
|
end
|
|
|
|
end
|