FEATURE: Remove attachments and truncate raw field for incoming emails (#8253)

Adds the settings: 

raw_email_max_length, raw_rejected_email_max_length, delete_rejected_email_after_days. 

These settings control retention of the "raw" emails logs.

raw_email_max_length ensures that if we get incoming email that is huge we will truncate it removing uploads from the raw log.

raw_rejected_email_max_length introduces an even more aggressive truncation for rejected incoming mail. 

delete_rejected_email_after_days controls how many days we will keep rejected emails for (default 90)
This commit is contained in:
Krzysztof Kotlarek 2019-10-30 16:54:35 +11:00 committed by Sam
parent fcb1ca52f9
commit c32bd8ae48
9 changed files with 124 additions and 3 deletions

View File

@ -13,6 +13,7 @@ module Jobs
Draft.cleanup!
UserAuthToken.cleanup!
Upload.reset_unknown_extensions!
Email::Cleaner.delete_rejected!
end
end
end

View File

@ -1856,6 +1856,9 @@ en:
private_email: "Don't include content from posts or topics in email title or email body. NOTE: also disables digest emails."
email_total_attachment_size_limit_kb: "Max total size of files attached to outgoing emails. Set to 0 to disable sending of attachments."
post_excerpts_in_emails: "In notification emails, always send excerpts instead of full posts"
raw_email_max_length: "How many characters should be stored for incoming email."
raw_rejected_email_max_length: "How many characters should be stored for rejected incoming email."
delete_rejected_email_after_days: "Delete rejected emails older than (n) days."
manual_polling_enabled: "Push emails using the API for email replies."
pop3_polling_enabled: "Poll via POP3 for email replies."

View File

@ -1055,6 +1055,9 @@ email:
default: 0
max: 51200
post_excerpts_in_emails: false
raw_email_max_length: 220000
raw_rejected_email_max_length: 4000
delete_rejected_email_after_days: 90
files:
max_image_size_kb:

42
lib/email/cleaner.rb Normal file
View File

@ -0,0 +1,42 @@
# frozen_string_literal: true
module Email
class Cleaner
def initialize(mail, remove_attachments: true, truncate: true, rejected: false)
@mail = Mail.new(mail)
@mail.charset = 'UTF-8'
@remove_attachments = remove_attachments
@truncate = truncate
@rejected = rejected
end
def execute
@mail.without_attachments! if @remove_attachments
truncate! if @truncate
remove_null_byte(@mail.to_s)
end
def self.delete_rejected!
IncomingEmail.delete_by('rejection_message IS NOT NULL AND created_at < ?', SiteSetting.delete_rejected_email_after_days.days.ago)
end
private
def truncate!
parts.each { |part| part.body = part.body.decoded.truncate(truncate_limit, omission: '') }
end
def parts
@mail.multipart? ? @mail.parts : [@mail]
end
def truncate_limit
@rejected ? SiteSetting.raw_rejected_email_max_length : SiteSetting.raw_email_max_length
end
def remove_null_byte(message)
message.gsub!("\x00", "")
message
end
end
end

View File

@ -128,7 +128,12 @@ module Email
end
def set_incoming_email_rejection_message(incoming_email, message)
incoming_email.update!(rejection_message: message) if incoming_email
if incoming_email
incoming_email.update!(
rejection_message: message,
raw: Email::Cleaner.new(incoming_email.raw, rejected: true).execute
)
end
end
def log_email_process_failure(mail_string, exception)

View File

@ -106,7 +106,7 @@ module Email
def create_incoming_email
IncomingEmail.create(
message_id: @message_id,
raw: @raw_email,
raw: Email::Cleaner.new(@raw_email).execute,
subject: subject,
from_address: @from_email,
to_addresses: @mail.to&.map(&:downcase)&.join(";"),
@ -1237,5 +1237,4 @@ module Email
end
end
end
end

View File

@ -0,0 +1,14 @@
# frozen_string_literal: true
desc "removes attachments and truncates long raw message"
task "incoming_emails:truncate_long" => :environment do
IncomingEmail.find_each do |incoming_email|
truncated_raw = Email::Cleaner.new(incoming_email.raw, rejected: incoming_email.rejection_message.present?).execute
# raw email is using \n as line separator, mail gem is using \r\n
# we need to determine if anything change to avoid updating all records
changed = truncated_raw != Mail.new(incoming_email.raw).to_s
incoming_email.update(raw: truncated_raw) if changed
end
end

View File

@ -0,0 +1,29 @@
# frozen_string_literal: true
require "rails_helper"
require "email/receiver"
describe Email::Cleaner do
it 'removes attachments from raw message' do
email = email(:attached_txt_file)
expected_message = "Return-Path: <discourse@bar.com>\r\nDate: Sat, 30 Jan 2016 01:10:11 +0100\r\nFrom: Foo Bar <discourse@bar.com>\r\nTo: reply+4f97315cc828096c9cb34c6f1a0d6fe8@bar.com\r\nMessage-ID: <38@foo.bar.mail>\r\nMime-Version: 1.0\r\nContent-Type: multipart/mixed;\r\n boundary=\"--==_mimepart_56abff5d49749_ddf83fca6d033a28548ad\";\r\n charset=UTF-8\r\nContent-Transfer-Encoding: 7bit\r\n\r\n\r\n----==_mimepart_56abff5d49749_ddf83fca6d033a28548ad\r\nContent-Type: text/plain;\r\n charset=UTF-8\r\nContent-Transfer-Encoding: 7bit\r\n\r\nPlease find some text file attached.\r\n----==_mimepart_56abff5d49749_ddf83fca6d033a28548ad--\r\n"
expect(described_class.new(email).execute).to eq(expected_message)
end
it 'truncates message' do
email = email(:attached_txt_file)
SiteSetting.raw_email_max_length = 10
expected_message = "Return-Path: <discourse@bar.com>\r\nDate: Sat, 30 Jan 2016 01:10:11 +0100\r\nFrom: Foo Bar <discourse@bar.com>\r\nTo: reply+4f97315cc828096c9cb34c6f1a0d6fe8@bar.com\r\nMessage-ID: <38@foo.bar.mail>\r\nMime-Version: 1.0\r\nContent-Type: multipart/mixed;\r\n boundary=\"--==_mimepart_56abff5d49749_ddf83fca6d033a28548ad\";\r\n charset=UTF-8\r\nContent-Transfer-Encoding: 7bit\r\n\r\n\r\n----==_mimepart_56abff5d49749_ddf83fca6d033a28548ad\r\nContent-Type: text/plain;\r\n charset=UTF-8\r\nContent-Transfer-Encoding: 7bit\r\n\r\nPlease fin\r\n----==_mimepart_56abff5d49749_ddf83fca6d033a28548ad--\r\n"
expect(described_class.new(email).execute).to eq(expected_message)
end
it 'truncates rejected message' do
email = email(:attached_txt_file)
SiteSetting.raw_rejected_email_max_length = 10
expected_message = "Return-Path: <discourse@bar.com>\r\nDate: Sat, 30 Jan 2016 01:10:11 +0100\r\nFrom: Foo Bar <discourse@bar.com>\r\nTo: reply+4f97315cc828096c9cb34c6f1a0d6fe8@bar.com\r\nMessage-ID: <38@foo.bar.mail>\r\nMime-Version: 1.0\r\nContent-Type: multipart/mixed;\r\n boundary=\"--==_mimepart_56abff5d49749_ddf83fca6d033a28548ad\";\r\n charset=UTF-8\r\nContent-Transfer-Encoding: 7bit\r\n\r\n\r\n----==_mimepart_56abff5d49749_ddf83fca6d033a28548ad\r\nContent-Type: text/plain;\r\n charset=UTF-8\r\nContent-Transfer-Encoding: 7bit\r\n\r\nPlease fin\r\n----==_mimepart_56abff5d49749_ddf83fca6d033a28548ad--\r\n"
expect(described_class.new(email, rejected: true).execute).to eq(expected_message)
end
end

View File

@ -0,0 +1,25 @@
# frozen_string_literal: true
require 'rails_helper'
RSpec.describe "incoming emails tasks" do
before do
Rake::Task.clear
Discourse::Application.load_tasks
end
describe 'email with attachment' do
fab!(:incoming_email) { Fabricate(:incoming_email, raw: email(:attached_txt_file)) }
it 'updates record' do
expect { Rake::Task['incoming_emails:truncate_long'].invoke }.to change { incoming_email.reload.raw }
end
end
describe 'short email without attachment' do
fab!(:incoming_email) { Fabricate(:incoming_email, raw: email(:html_reply)) }
it 'does not update record' do
expect { Rake::Task['incoming_emails:truncate_long'].invoke }.not_to change { incoming_email.reload.raw }
end
end
end