FIX: improve mailman email parsing (#21627)

https://meta.discourse.org/t/improving-mailman-email-parsing/253041

When mirroring a public mailling list which uses mailman, there were some cases where the incoming email was not associated to the proper user.

As it happens, for various (undertermined) reasons, the email from the sender is often not in the `From` header but can be in any of the following headers: `Reply-To`, `CC`, `X-Original-From`, `X-MailFrom`.

It might be in other headers as well, but those were the ones we found the most reliable.
This commit is contained in:
Régis Hanol 2023-05-19 10:33:48 +02:00 committed by GitHub
parent f62904715f
commit db9d998de3
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
8 changed files with 226 additions and 101 deletions

View File

@ -107,7 +107,7 @@ module Email
Email::Validator.ensure_valid!(@mail)
@from_email, @from_display_name = parse_from_field
@from_email, @from_display_name = parse_from_field(@mail)
@from_user = User.find_by_email(@from_email)
@incoming_email = create_incoming_email
@ -660,14 +660,15 @@ module Email
reply.split(reply_above_line_regex)[0]
end
def parse_from_field(mail = nil)
mail ||= @mail
def parse_from_field(mail, process_forwarded_emails: true)
if email_log.present?
email = email_log.to_address || email_log.user&.email
return email, email_log.user&.username
elsif mail.bounced?
Array.wrap(mail.final_recipient).each { |from| return extract_from_address_and_name(from) }
# "Final-Recipient" has a specific format (<name> ; <address>)
# cf. https://www.ietf.org/rfc/rfc2298.html#section-3.2.4
address_type, generic_address = mail.final_recipient.to_s.split(";").map { _1.to_s.strip }
return generic_address, nil if generic_address.include?("@") && address_type == "rfc822"
end
return unless mail[:from]
@ -675,87 +676,81 @@ module Email
# For forwarded emails, where the from address matches a group incoming
# email, we want to use the from address of the original email sender,
# which we can extract from embedded_email_raw.
if has_been_forwarded?
if mail[:from].to_s =~ group_incoming_emails_regex && embedded_email[:from].errors.blank?
from_address, from_display_name = extract_from_fields_from_header(embedded_email, :from)
return from_address, from_display_name if from_address
if process_forwarded_emails && has_been_forwarded?
if mail[:from].to_s =~ group_incoming_emails_regex
if embedded_email && embedded_email[:from].errors.blank?
from_address, from_display_name =
Email::Receiver.extract_email_address_and_name(embedded_email[:from])
return from_address, from_display_name if from_address
end
end
end
# extract proper sender when using mailman mailing list
if mail[:x_mailman_version].present?
address, name = Email::Receiver.extract_email_address_and_name_from_mailman(mail)
return address, name if address
end
# For now we are only using the Reply-To header if the email has
# been forwarded via Google Groups, which is why we are checking the
# X-Original-From header too. In future we may want to use the Reply-To
# header in more cases.
if mail["X-Original-From"].present?
if mail[:reply_to] && mail[:reply_to].errors.blank?
from_address, from_display_name =
extract_from_fields_from_header(
mail,
:reply_to,
comparison_headers: ["X-Original-From"],
)
return from_address, from_display_name if from_address
end
if mail[:x_original_from].present? && mail[:reply_to].present?
original_from_address, _ =
Email::Receiver.extract_email_address_and_name(mail[:x_original_from])
reply_to_address, reply_to_name =
Email::Receiver.extract_email_address_and_name(mail[:reply_to])
return reply_to_address, reply_to_name if original_from_address == reply_to_address
end
if mail[:from].errors.blank?
from_address, from_display_name = extract_from_fields_from_header(mail, :from)
return from_address, from_display_name if from_address
end
return extract_from_address_and_name(mail.from) if mail.from.is_a? String
if mail.from.is_a? Mail::AddressContainer
mail.from.each do |from|
from_address, from_display_name = extract_from_address_and_name(from)
return from_address, from_display_name if from_address
end
end
nil
Email::Receiver.extract_email_address_and_name(mail[:from])
rescue StandardError
nil
end
def extract_from_fields_from_header(mail_object, header, comparison_headers: [])
mail_object[header].each do |address_field|
from_address = address_field.address
from_display_name = address_field.display_name&.to_s
def self.extract_email_address_and_name_from_mailman(mail)
list_address, _ = Email::Receiver.extract_email_address_and_name(mail[:list_post])
list_address, _ =
Email::Receiver.extract_email_address_and_name(mail[:x_beenthere]) if list_address.blank?
comparison_failed = false
comparison_headers.each do |comparison_header|
comparison_header_address = mail_object[comparison_header].to_s[/<([^>]+)>/, 1]
if comparison_header_address != from_address
comparison_failed = true
break
end
return if list_address.blank?
# the CC header often includes the name of the sender
address_to_name = mail[:cc]&.element&.addresses&.to_h { [_1.address, _1.name] } || {}
%i[from reply_to x_mailfrom x_original_from].each do |header|
next if mail[header].blank?
email, name = Email::Receiver.extract_email_address_and_name(mail[header])
if email.present? && email != list_address
return email, name.presence || address_to_name[email]
end
next if comparison_failed
next if !from_address&.include?("@")
return from_address&.downcase, from_display_name&.strip
end
[nil, nil]
end
def extract_from_address_and_name(value)
if value[";"]
from_display_name, from_address = value.split(";")
return from_address&.strip&.downcase, from_display_name&.strip
def self.extract_email_address_and_name(value)
begin
# ensure the email header value is a string
value = value.to_s
# in embedded emails, converts [mailto:foo@bar.com] to <foo@bar.com>
value = value.gsub(/\[mailto:([^\[\]]+?)\]/, "<\\1>")
# 'mailto:' suffix isn't supported by Mail::Address parsing
value = value.gsub("mailto:", "")
# parse the email header value
parsed = Mail::Address.new(value)
# extract the email address and name
mail = parsed.address.to_s.downcase.strip
name = parsed.name.to_s.strip
# ensure the email address is "valid"
if mail.include?("@")
# remove surrounding quotes from the name
name = name[1...-1] if name.size > 2 && name[/\A(['"]).+(\1)\z/]
# return the email address and name
[mail, name]
end
rescue Mail::Field::ParseError, Mail::Field::IncompleteParseError => e
# something went wrong parsing the email header value, return nil
end
if value[/<[^>]+>/]
from_address = value[/<([^>]+)>/, 1]
from_display_name = value[/\A([^<]+)/, 1]
end
if (from_address.blank? || !from_address["@"]) && value[/\[mailto:[^\]]+\]/]
from_address = value[/\[mailto:([^\]]+)\]/, 1]
from_display_name = value[/\A([^\[]+)/, 1]
end
[from_address&.downcase, from_display_name&.strip]
end
def subject
@ -1089,24 +1084,28 @@ module Email
end
def forwarded_email_create_replies(destination, user)
embedded = Mail.new(embedded_email_raw)
email, display_name = parse_from_field(embedded)
forwarded_by_address, forwarded_by_name =
Email::Receiver.extract_email_address_and_name(@mail[:from])
if forwarded_by_address && forwarded_by_name
@forwarded_by_user = stage_sender_user(forwarded_by_address, forwarded_by_name)
end
return false if email.blank? || !email["@"]
email_address, display_name =
parse_from_field(embedded_email, process_forwarded_emails: false)
return false if email_address.blank? || !email_address.include?("@")
post =
forwarded_email_create_topic(
destination: destination,
user: user,
raw: try_to_encode(embedded.decoded, "UTF-8").presence || embedded.to_s,
title: embedded.subject.presence || subject,
date: embedded.date,
embedded_user: lambda { find_or_create_user(email, display_name) },
raw: try_to_encode(embedded_email.decoded, "UTF-8").presence || embedded_email.to_s,
title: embedded_email.subject.presence || subject,
date: embedded_email.date,
embedded_user: lambda { find_or_create_user(email_address, display_name) },
)
return false unless post
if post.topic
@ -1143,25 +1142,12 @@ module Email
true
end
def forwarded_by_sender
@forwarded_by_sender ||= extract_from_fields_from_header(@mail, :from)
end
def forwarded_by_address
@forwarded_by_address ||= forwarded_by_sender&.first
end
def forwarded_by_name
@forwarded_by_name ||= forwarded_by_sender&.first
end
def forwarded_email_quote_forwarded(destination, user)
embedded = embedded_email_raw
raw = <<~MD
#{@before_embedded}
[quote]
#{PlainTextToMarkdown.new(embedded).to_markdown}
#{PlainTextToMarkdown.new(@embedded_email_raw).to_markdown}
[/quote]
MD
@ -1528,7 +1514,7 @@ module Email
address_field.decoded
email = address_field.address.downcase
display_name = address_field.display_name.try(:to_s)
next unless email["@"]
next if !email.include?("@")
if should_invite?(email)
user = User.find_by_email(email)

View File

@ -207,3 +207,56 @@ task "emails:test", [:email] => [:environment] do |_, args|
Consider changing it to 'no' before performing any further troubleshooting.
TEXT
end
desc "run this to fix users associated to emails mirrored from a mailman mailing list"
task "emails:fix_mailman_users" => :environment do
if !SiteSetting.enable_staged_users
puts "Please enable staged users first"
exit 1
end
def find_or_create_user(email, name)
user = nil
User.transaction do
unless user = User.find_by_email(email)
username = UserNameSuggester.sanitize_username(name) if name.present?
username = UserNameSuggester.suggest(username.presence || email)
name = name.presence || User.suggest_name(email)
begin
user = User.create!(email: email, username: username, name: name, staged: true)
rescue PG::UniqueViolation, ActiveRecord::RecordNotUnique, ActiveRecord::RecordInvalid
end
end
end
user
end
IncomingEmail
.includes(:user, :post)
.where("raw LIKE '%X-Mailman-Version: %'")
.find_each do |ie|
next unless ie.post.present?
mail = Mail.new(ie.raw)
email, name = Email::Receiver.extract_email_address_and_name_from_mailman(mail)
if email.blank? || email == ie.user.email
putc "."
elsif new_owner = find_or_create_user(email, name)
PostOwnerChanger.new(
post_ids: [ie.post_id],
topic_id: ie.post.topic_id,
new_owner: new_owner,
acting_user: Discourse.system_user,
skip_revision: true,
).change_owner!
putc "#"
else
putc "X"
end
end
nil
end

19
spec/fixtures/emails/mailman_1.eml vendored Normal file
View File

@ -0,0 +1,19 @@
Date: Tue, 16 May 2023 20:50:54 -0700
From: Some One <some@one.com>
Reply-To: Example users <list@ml.example.com>
To: list@example.com
Message-ID: <4460a72d7764ad03b5cecd65fe4c2fbd@foo.com>
Subject: library 1.2.3 released!
Mime-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 7bit
X-MailFrom: some@one.com
X-Mailman-Version: 2.3.4
Precedence: list
List-Id: Example users <list.ml.example.com>
List-Owner: <mailto:list-owner@ml.example.com>
List-Post: <mailto:list@ml.example.com>
List-Subscribe: <mailto:list-join@ml.example.com>
List-Unsubscribe: <mailto:list-leave@ml.example.com>
library version 1.2.3 has been released!

15
spec/fixtures/emails/mailman_2.eml vendored Normal file
View File

@ -0,0 +1,15 @@
Date: Tue, 16 May 2023 20:50:54 -0700
From: Some One via example <list@ml.example.com>
Reply-To: some@one.com
To: list@example.com
Message-ID: <23e86ebe4d9f211967e48d284449c856@foo.com>
Subject: library 1.2.3 released!
Mime-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 7bit
X-MailFrom: some@one.com
X-Mailman-Version: 1.2.3
X-BeenThere: list@ml.example.com
Precedence: list
library version 1.2.3 has been released!

15
spec/fixtures/emails/mailman_3.eml vendored Normal file
View File

@ -0,0 +1,15 @@
Date: Tue, 16 May 2023 20:50:54 -0700
From: Some One via example <list@ml.example.com>
To: list@example.com
CC: Some One <some@one.com>
Message-ID: <5a0d382572c511c24daf1558c02f6a19@foo.com>
Subject: library 1.2.3 released!
Mime-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 7bit
X-MailFrom: some@one.com
X-Mailman-Version: 2.2.2
X-BeenThere: list@ml.example.com
Precedence: list
library version 1.2.3 has been released!

14
spec/fixtures/emails/mailman_4.eml vendored Normal file
View File

@ -0,0 +1,14 @@
Date: Tue, 16 May 2023 20:50:54 -0700
From: Some One via example <list@ml.example.com>
To: list@example.com
Message-ID: <8df45354b9ca21a868023b2dd3296a8d@foo.com>
Subject: library 1.2.3 released!
Mime-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 7bit
X-Original-From: some@one.com
X-Mailman-Version: 1.2.3
Precedence: list
List-Post: <mailto:list@ml.example.com>
library version 1.2.3 has been released!

View File

@ -7,7 +7,7 @@ Message-ID: <3848c3m98r439c348mc349@test.mailinglist.com>
Mime-Version: 1.0
Content-Type: text/plain
Content-Transfer-Encoding: 7bit
X-Original-From: Marston, John <johnmarston@reddeadtest.com>
X-Original-From: "Marston, John" <johnmarston@reddeadtest.com>
Precedence: list
Mailing-list: list westernsupport@test.mailinglist.com; contact
westernsupport+owners@test.mailinglist.com

View File

@ -218,14 +218,6 @@ RSpec.describe Email::Receiver do
expect(IncomingEmail.last.error).to eq("RuntimeError")
end
it "matches the correct user" do
user = Fabricate(:user)
email_log = Fabricate(:email_log, to_address: user.email, user: user, bounce_key: nil)
email, name = Email::Receiver.new(email(:existing_user)).parse_from_field
expect(email).to eq("existing@bar.com")
expect(name).to eq("Foo Bar")
end
it "strips null bytes from the subject" do
expect do process(:null_byte_in_subject) end.to raise_error(
Email::Receiver::BadDestinationAddress,
@ -512,9 +504,8 @@ RSpec.describe Email::Receiver do
)
end
it "handles invalid from header" do
expect { process(:invalid_from_1) }.to change { topic.posts.count }
expect(topic.posts.last.raw).to eq("This email was sent with an invalid from header field.")
it "raises a NoSenderDetectedError when the From header can't be parsed" do
expect { process(:invalid_from_1) }.to raise_error(Email::Receiver::NoSenderDetectedError)
end
it "raises a NoSenderDetectedError when the From header doesn't contain an email address" do
@ -1996,6 +1987,38 @@ RSpec.describe Email::Receiver do
end
end
describe "mailman mirror" do
fab!(:category) { Fabricate(:mailinglist_mirror_category) }
it "uses 'from' email address" do
expect { process(:mailman_1) }.to change { Topic.count }
user = Topic.last.user
expect(user.email).to eq("some@one.com")
expect(user.name).to eq("Some One")
end
it "uses 'reply-to' email address" do
expect { process(:mailman_2) }.to change { Topic.count }
user = Topic.last.user
expect(user.email).to eq("some@one.com")
expect(user.name).to eq("Some")
end
it "uses 'x-mailfrom' email address and name from CC" do
expect { process(:mailman_3) }.to change { Topic.count }
user = Topic.last.user
expect(user.email).to eq("some@one.com")
expect(user.name).to eq("Some One")
end
it "uses 'x-original-from' email address" do
expect { process(:mailman_4) }.to change { Topic.count }
user = Topic.last.user
expect(user.email).to eq("some@one.com")
expect(user.name).to eq("Some")
end
end
describe "mailing list mirror" do
fab!(:category) { Fabricate(:mailinglist_mirror_category) }