FIX: improve mailman email parsing (#21627)

https://meta.discourse.org/t/improving-mailman-email-parsing/253041

When mirroring a public mailling list which uses mailman, there were some cases where the incoming email was not associated to the proper user.

As it happens, for various (undertermined) reasons, the email from the sender is often not in the `From` header but can be in any of the following headers: `Reply-To`, `CC`, `X-Original-From`, `X-MailFrom`.

It might be in other headers as well, but those were the ones we found the most reliable.
This commit is contained in:
Régis Hanol 2023-05-19 10:33:48 +02:00 committed by GitHub
parent f62904715f
commit db9d998de3
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
8 changed files with 226 additions and 101 deletions

View File

@ -107,7 +107,7 @@ module Email
Email::Validator.ensure_valid!(@mail) Email::Validator.ensure_valid!(@mail)
@from_email, @from_display_name = parse_from_field @from_email, @from_display_name = parse_from_field(@mail)
@from_user = User.find_by_email(@from_email) @from_user = User.find_by_email(@from_email)
@incoming_email = create_incoming_email @incoming_email = create_incoming_email
@ -660,14 +660,15 @@ module Email
reply.split(reply_above_line_regex)[0] reply.split(reply_above_line_regex)[0]
end end
def parse_from_field(mail = nil) def parse_from_field(mail, process_forwarded_emails: true)
mail ||= @mail
if email_log.present? if email_log.present?
email = email_log.to_address || email_log.user&.email email = email_log.to_address || email_log.user&.email
return email, email_log.user&.username return email, email_log.user&.username
elsif mail.bounced? elsif mail.bounced?
Array.wrap(mail.final_recipient).each { |from| return extract_from_address_and_name(from) } # "Final-Recipient" has a specific format (<name> ; <address>)
# cf. https://www.ietf.org/rfc/rfc2298.html#section-3.2.4
address_type, generic_address = mail.final_recipient.to_s.split(";").map { _1.to_s.strip }
return generic_address, nil if generic_address.include?("@") && address_type == "rfc822"
end end
return unless mail[:from] return unless mail[:from]
@ -675,87 +676,81 @@ module Email
# For forwarded emails, where the from address matches a group incoming # For forwarded emails, where the from address matches a group incoming
# email, we want to use the from address of the original email sender, # email, we want to use the from address of the original email sender,
# which we can extract from embedded_email_raw. # which we can extract from embedded_email_raw.
if has_been_forwarded? if process_forwarded_emails && has_been_forwarded?
if mail[:from].to_s =~ group_incoming_emails_regex && embedded_email[:from].errors.blank? if mail[:from].to_s =~ group_incoming_emails_regex
from_address, from_display_name = extract_from_fields_from_header(embedded_email, :from) if embedded_email && embedded_email[:from].errors.blank?
return from_address, from_display_name if from_address from_address, from_display_name =
Email::Receiver.extract_email_address_and_name(embedded_email[:from])
return from_address, from_display_name if from_address
end
end end
end end
# extract proper sender when using mailman mailing list
if mail[:x_mailman_version].present?
address, name = Email::Receiver.extract_email_address_and_name_from_mailman(mail)
return address, name if address
end
# For now we are only using the Reply-To header if the email has # For now we are only using the Reply-To header if the email has
# been forwarded via Google Groups, which is why we are checking the # been forwarded via Google Groups, which is why we are checking the
# X-Original-From header too. In future we may want to use the Reply-To # X-Original-From header too. In future we may want to use the Reply-To
# header in more cases. # header in more cases.
if mail["X-Original-From"].present? if mail[:x_original_from].present? && mail[:reply_to].present?
if mail[:reply_to] && mail[:reply_to].errors.blank? original_from_address, _ =
from_address, from_display_name = Email::Receiver.extract_email_address_and_name(mail[:x_original_from])
extract_from_fields_from_header( reply_to_address, reply_to_name =
mail, Email::Receiver.extract_email_address_and_name(mail[:reply_to])
:reply_to, return reply_to_address, reply_to_name if original_from_address == reply_to_address
comparison_headers: ["X-Original-From"],
)
return from_address, from_display_name if from_address
end
end end
if mail[:from].errors.blank? Email::Receiver.extract_email_address_and_name(mail[:from])
from_address, from_display_name = extract_from_fields_from_header(mail, :from)
return from_address, from_display_name if from_address
end
return extract_from_address_and_name(mail.from) if mail.from.is_a? String
if mail.from.is_a? Mail::AddressContainer
mail.from.each do |from|
from_address, from_display_name = extract_from_address_and_name(from)
return from_address, from_display_name if from_address
end
end
nil
rescue StandardError rescue StandardError
nil nil
end end
def extract_from_fields_from_header(mail_object, header, comparison_headers: []) def self.extract_email_address_and_name_from_mailman(mail)
mail_object[header].each do |address_field| list_address, _ = Email::Receiver.extract_email_address_and_name(mail[:list_post])
from_address = address_field.address list_address, _ =
from_display_name = address_field.display_name&.to_s Email::Receiver.extract_email_address_and_name(mail[:x_beenthere]) if list_address.blank?
comparison_failed = false return if list_address.blank?
comparison_headers.each do |comparison_header|
comparison_header_address = mail_object[comparison_header].to_s[/<([^>]+)>/, 1] # the CC header often includes the name of the sender
if comparison_header_address != from_address address_to_name = mail[:cc]&.element&.addresses&.to_h { [_1.address, _1.name] } || {}
comparison_failed = true
break %i[from reply_to x_mailfrom x_original_from].each do |header|
end next if mail[header].blank?
email, name = Email::Receiver.extract_email_address_and_name(mail[header])
if email.present? && email != list_address
return email, name.presence || address_to_name[email]
end end
next if comparison_failed
next if !from_address&.include?("@")
return from_address&.downcase, from_display_name&.strip
end end
[nil, nil]
end end
def extract_from_address_and_name(value) def self.extract_email_address_and_name(value)
if value[";"] begin
from_display_name, from_address = value.split(";") # ensure the email header value is a string
return from_address&.strip&.downcase, from_display_name&.strip value = value.to_s
# in embedded emails, converts [mailto:foo@bar.com] to <foo@bar.com>
value = value.gsub(/\[mailto:([^\[\]]+?)\]/, "<\\1>")
# 'mailto:' suffix isn't supported by Mail::Address parsing
value = value.gsub("mailto:", "")
# parse the email header value
parsed = Mail::Address.new(value)
# extract the email address and name
mail = parsed.address.to_s.downcase.strip
name = parsed.name.to_s.strip
# ensure the email address is "valid"
if mail.include?("@")
# remove surrounding quotes from the name
name = name[1...-1] if name.size > 2 && name[/\A(['"]).+(\1)\z/]
# return the email address and name
[mail, name]
end
rescue Mail::Field::ParseError, Mail::Field::IncompleteParseError => e
# something went wrong parsing the email header value, return nil
end end
if value[/<[^>]+>/]
from_address = value[/<([^>]+)>/, 1]
from_display_name = value[/\A([^<]+)/, 1]
end
if (from_address.blank? || !from_address["@"]) && value[/\[mailto:[^\]]+\]/]
from_address = value[/\[mailto:([^\]]+)\]/, 1]
from_display_name = value[/\A([^\[]+)/, 1]
end
[from_address&.downcase, from_display_name&.strip]
end end
def subject def subject
@ -1089,24 +1084,28 @@ module Email
end end
def forwarded_email_create_replies(destination, user) def forwarded_email_create_replies(destination, user)
embedded = Mail.new(embedded_email_raw) forwarded_by_address, forwarded_by_name =
email, display_name = parse_from_field(embedded) Email::Receiver.extract_email_address_and_name(@mail[:from])
if forwarded_by_address && forwarded_by_name if forwarded_by_address && forwarded_by_name
@forwarded_by_user = stage_sender_user(forwarded_by_address, forwarded_by_name) @forwarded_by_user = stage_sender_user(forwarded_by_address, forwarded_by_name)
end end
return false if email.blank? || !email["@"] email_address, display_name =
parse_from_field(embedded_email, process_forwarded_emails: false)
return false if email_address.blank? || !email_address.include?("@")
post = post =
forwarded_email_create_topic( forwarded_email_create_topic(
destination: destination, destination: destination,
user: user, user: user,
raw: try_to_encode(embedded.decoded, "UTF-8").presence || embedded.to_s, raw: try_to_encode(embedded_email.decoded, "UTF-8").presence || embedded_email.to_s,
title: embedded.subject.presence || subject, title: embedded_email.subject.presence || subject,
date: embedded.date, date: embedded_email.date,
embedded_user: lambda { find_or_create_user(email, display_name) }, embedded_user: lambda { find_or_create_user(email_address, display_name) },
) )
return false unless post return false unless post
if post.topic if post.topic
@ -1143,25 +1142,12 @@ module Email
true true
end end
def forwarded_by_sender
@forwarded_by_sender ||= extract_from_fields_from_header(@mail, :from)
end
def forwarded_by_address
@forwarded_by_address ||= forwarded_by_sender&.first
end
def forwarded_by_name
@forwarded_by_name ||= forwarded_by_sender&.first
end
def forwarded_email_quote_forwarded(destination, user) def forwarded_email_quote_forwarded(destination, user)
embedded = embedded_email_raw
raw = <<~MD raw = <<~MD
#{@before_embedded} #{@before_embedded}
[quote] [quote]
#{PlainTextToMarkdown.new(embedded).to_markdown} #{PlainTextToMarkdown.new(@embedded_email_raw).to_markdown}
[/quote] [/quote]
MD MD
@ -1528,7 +1514,7 @@ module Email
address_field.decoded address_field.decoded
email = address_field.address.downcase email = address_field.address.downcase
display_name = address_field.display_name.try(:to_s) display_name = address_field.display_name.try(:to_s)
next unless email["@"] next if !email.include?("@")
if should_invite?(email) if should_invite?(email)
user = User.find_by_email(email) user = User.find_by_email(email)

View File

@ -207,3 +207,56 @@ task "emails:test", [:email] => [:environment] do |_, args|
Consider changing it to 'no' before performing any further troubleshooting. Consider changing it to 'no' before performing any further troubleshooting.
TEXT TEXT
end end
desc "run this to fix users associated to emails mirrored from a mailman mailing list"
task "emails:fix_mailman_users" => :environment do
if !SiteSetting.enable_staged_users
puts "Please enable staged users first"
exit 1
end
def find_or_create_user(email, name)
user = nil
User.transaction do
unless user = User.find_by_email(email)
username = UserNameSuggester.sanitize_username(name) if name.present?
username = UserNameSuggester.suggest(username.presence || email)
name = name.presence || User.suggest_name(email)
begin
user = User.create!(email: email, username: username, name: name, staged: true)
rescue PG::UniqueViolation, ActiveRecord::RecordNotUnique, ActiveRecord::RecordInvalid
end
end
end
user
end
IncomingEmail
.includes(:user, :post)
.where("raw LIKE '%X-Mailman-Version: %'")
.find_each do |ie|
next unless ie.post.present?
mail = Mail.new(ie.raw)
email, name = Email::Receiver.extract_email_address_and_name_from_mailman(mail)
if email.blank? || email == ie.user.email
putc "."
elsif new_owner = find_or_create_user(email, name)
PostOwnerChanger.new(
post_ids: [ie.post_id],
topic_id: ie.post.topic_id,
new_owner: new_owner,
acting_user: Discourse.system_user,
skip_revision: true,
).change_owner!
putc "#"
else
putc "X"
end
end
nil
end

19
spec/fixtures/emails/mailman_1.eml vendored Normal file
View File

@ -0,0 +1,19 @@
Date: Tue, 16 May 2023 20:50:54 -0700
From: Some One <some@one.com>
Reply-To: Example users <list@ml.example.com>
To: list@example.com
Message-ID: <4460a72d7764ad03b5cecd65fe4c2fbd@foo.com>
Subject: library 1.2.3 released!
Mime-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 7bit
X-MailFrom: some@one.com
X-Mailman-Version: 2.3.4
Precedence: list
List-Id: Example users <list.ml.example.com>
List-Owner: <mailto:list-owner@ml.example.com>
List-Post: <mailto:list@ml.example.com>
List-Subscribe: <mailto:list-join@ml.example.com>
List-Unsubscribe: <mailto:list-leave@ml.example.com>
library version 1.2.3 has been released!

15
spec/fixtures/emails/mailman_2.eml vendored Normal file
View File

@ -0,0 +1,15 @@
Date: Tue, 16 May 2023 20:50:54 -0700
From: Some One via example <list@ml.example.com>
Reply-To: some@one.com
To: list@example.com
Message-ID: <23e86ebe4d9f211967e48d284449c856@foo.com>
Subject: library 1.2.3 released!
Mime-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 7bit
X-MailFrom: some@one.com
X-Mailman-Version: 1.2.3
X-BeenThere: list@ml.example.com
Precedence: list
library version 1.2.3 has been released!

15
spec/fixtures/emails/mailman_3.eml vendored Normal file
View File

@ -0,0 +1,15 @@
Date: Tue, 16 May 2023 20:50:54 -0700
From: Some One via example <list@ml.example.com>
To: list@example.com
CC: Some One <some@one.com>
Message-ID: <5a0d382572c511c24daf1558c02f6a19@foo.com>
Subject: library 1.2.3 released!
Mime-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 7bit
X-MailFrom: some@one.com
X-Mailman-Version: 2.2.2
X-BeenThere: list@ml.example.com
Precedence: list
library version 1.2.3 has been released!

14
spec/fixtures/emails/mailman_4.eml vendored Normal file
View File

@ -0,0 +1,14 @@
Date: Tue, 16 May 2023 20:50:54 -0700
From: Some One via example <list@ml.example.com>
To: list@example.com
Message-ID: <8df45354b9ca21a868023b2dd3296a8d@foo.com>
Subject: library 1.2.3 released!
Mime-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 7bit
X-Original-From: some@one.com
X-Mailman-Version: 1.2.3
Precedence: list
List-Post: <mailto:list@ml.example.com>
library version 1.2.3 has been released!

View File

@ -7,7 +7,7 @@ Message-ID: <3848c3m98r439c348mc349@test.mailinglist.com>
Mime-Version: 1.0 Mime-Version: 1.0
Content-Type: text/plain Content-Type: text/plain
Content-Transfer-Encoding: 7bit Content-Transfer-Encoding: 7bit
X-Original-From: Marston, John <johnmarston@reddeadtest.com> X-Original-From: "Marston, John" <johnmarston@reddeadtest.com>
Precedence: list Precedence: list
Mailing-list: list westernsupport@test.mailinglist.com; contact Mailing-list: list westernsupport@test.mailinglist.com; contact
westernsupport+owners@test.mailinglist.com westernsupport+owners@test.mailinglist.com

View File

@ -218,14 +218,6 @@ RSpec.describe Email::Receiver do
expect(IncomingEmail.last.error).to eq("RuntimeError") expect(IncomingEmail.last.error).to eq("RuntimeError")
end end
it "matches the correct user" do
user = Fabricate(:user)
email_log = Fabricate(:email_log, to_address: user.email, user: user, bounce_key: nil)
email, name = Email::Receiver.new(email(:existing_user)).parse_from_field
expect(email).to eq("existing@bar.com")
expect(name).to eq("Foo Bar")
end
it "strips null bytes from the subject" do it "strips null bytes from the subject" do
expect do process(:null_byte_in_subject) end.to raise_error( expect do process(:null_byte_in_subject) end.to raise_error(
Email::Receiver::BadDestinationAddress, Email::Receiver::BadDestinationAddress,
@ -512,9 +504,8 @@ RSpec.describe Email::Receiver do
) )
end end
it "handles invalid from header" do it "raises a NoSenderDetectedError when the From header can't be parsed" do
expect { process(:invalid_from_1) }.to change { topic.posts.count } expect { process(:invalid_from_1) }.to raise_error(Email::Receiver::NoSenderDetectedError)
expect(topic.posts.last.raw).to eq("This email was sent with an invalid from header field.")
end end
it "raises a NoSenderDetectedError when the From header doesn't contain an email address" do it "raises a NoSenderDetectedError when the From header doesn't contain an email address" do
@ -1996,6 +1987,38 @@ RSpec.describe Email::Receiver do
end end
end end
describe "mailman mirror" do
fab!(:category) { Fabricate(:mailinglist_mirror_category) }
it "uses 'from' email address" do
expect { process(:mailman_1) }.to change { Topic.count }
user = Topic.last.user
expect(user.email).to eq("some@one.com")
expect(user.name).to eq("Some One")
end
it "uses 'reply-to' email address" do
expect { process(:mailman_2) }.to change { Topic.count }
user = Topic.last.user
expect(user.email).to eq("some@one.com")
expect(user.name).to eq("Some")
end
it "uses 'x-mailfrom' email address and name from CC" do
expect { process(:mailman_3) }.to change { Topic.count }
user = Topic.last.user
expect(user.email).to eq("some@one.com")
expect(user.name).to eq("Some One")
end
it "uses 'x-original-from' email address" do
expect { process(:mailman_4) }.to change { Topic.count }
user = Topic.last.user
expect(user.email).to eq("some@one.com")
expect(user.name).to eq("Some")
end
end
describe "mailing list mirror" do describe "mailing list mirror" do
fab!(:category) { Fabricate(:mailinglist_mirror_category) } fab!(:category) { Fabricate(:mailinglist_mirror_category) }