FIX: improve mailman email parsing (#21627)
https://meta.discourse.org/t/improving-mailman-email-parsing/253041 When mirroring a public mailling list which uses mailman, there were some cases where the incoming email was not associated to the proper user. As it happens, for various (undertermined) reasons, the email from the sender is often not in the `From` header but can be in any of the following headers: `Reply-To`, `CC`, `X-Original-From`, `X-MailFrom`. It might be in other headers as well, but those were the ones we found the most reliable.
This commit is contained in:
parent
f62904715f
commit
db9d998de3
|
@ -107,7 +107,7 @@ module Email
|
|||
|
||||
Email::Validator.ensure_valid!(@mail)
|
||||
|
||||
@from_email, @from_display_name = parse_from_field
|
||||
@from_email, @from_display_name = parse_from_field(@mail)
|
||||
@from_user = User.find_by_email(@from_email)
|
||||
@incoming_email = create_incoming_email
|
||||
|
||||
|
@ -660,14 +660,15 @@ module Email
|
|||
reply.split(reply_above_line_regex)[0]
|
||||
end
|
||||
|
||||
def parse_from_field(mail = nil)
|
||||
mail ||= @mail
|
||||
|
||||
def parse_from_field(mail, process_forwarded_emails: true)
|
||||
if email_log.present?
|
||||
email = email_log.to_address || email_log.user&.email
|
||||
return email, email_log.user&.username
|
||||
elsif mail.bounced?
|
||||
Array.wrap(mail.final_recipient).each { |from| return extract_from_address_and_name(from) }
|
||||
# "Final-Recipient" has a specific format (<name> ; <address>)
|
||||
# cf. https://www.ietf.org/rfc/rfc2298.html#section-3.2.4
|
||||
address_type, generic_address = mail.final_recipient.to_s.split(";").map { _1.to_s.strip }
|
||||
return generic_address, nil if generic_address.include?("@") && address_type == "rfc822"
|
||||
end
|
||||
|
||||
return unless mail[:from]
|
||||
|
@ -675,87 +676,81 @@ module Email
|
|||
# For forwarded emails, where the from address matches a group incoming
|
||||
# email, we want to use the from address of the original email sender,
|
||||
# which we can extract from embedded_email_raw.
|
||||
if has_been_forwarded?
|
||||
if mail[:from].to_s =~ group_incoming_emails_regex && embedded_email[:from].errors.blank?
|
||||
from_address, from_display_name = extract_from_fields_from_header(embedded_email, :from)
|
||||
return from_address, from_display_name if from_address
|
||||
if process_forwarded_emails && has_been_forwarded?
|
||||
if mail[:from].to_s =~ group_incoming_emails_regex
|
||||
if embedded_email && embedded_email[:from].errors.blank?
|
||||
from_address, from_display_name =
|
||||
Email::Receiver.extract_email_address_and_name(embedded_email[:from])
|
||||
return from_address, from_display_name if from_address
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
# extract proper sender when using mailman mailing list
|
||||
if mail[:x_mailman_version].present?
|
||||
address, name = Email::Receiver.extract_email_address_and_name_from_mailman(mail)
|
||||
return address, name if address
|
||||
end
|
||||
|
||||
# For now we are only using the Reply-To header if the email has
|
||||
# been forwarded via Google Groups, which is why we are checking the
|
||||
# X-Original-From header too. In future we may want to use the Reply-To
|
||||
# header in more cases.
|
||||
if mail["X-Original-From"].present?
|
||||
if mail[:reply_to] && mail[:reply_to].errors.blank?
|
||||
from_address, from_display_name =
|
||||
extract_from_fields_from_header(
|
||||
mail,
|
||||
:reply_to,
|
||||
comparison_headers: ["X-Original-From"],
|
||||
)
|
||||
return from_address, from_display_name if from_address
|
||||
end
|
||||
if mail[:x_original_from].present? && mail[:reply_to].present?
|
||||
original_from_address, _ =
|
||||
Email::Receiver.extract_email_address_and_name(mail[:x_original_from])
|
||||
reply_to_address, reply_to_name =
|
||||
Email::Receiver.extract_email_address_and_name(mail[:reply_to])
|
||||
return reply_to_address, reply_to_name if original_from_address == reply_to_address
|
||||
end
|
||||
|
||||
if mail[:from].errors.blank?
|
||||
from_address, from_display_name = extract_from_fields_from_header(mail, :from)
|
||||
return from_address, from_display_name if from_address
|
||||
end
|
||||
|
||||
return extract_from_address_and_name(mail.from) if mail.from.is_a? String
|
||||
|
||||
if mail.from.is_a? Mail::AddressContainer
|
||||
mail.from.each do |from|
|
||||
from_address, from_display_name = extract_from_address_and_name(from)
|
||||
return from_address, from_display_name if from_address
|
||||
end
|
||||
end
|
||||
|
||||
nil
|
||||
Email::Receiver.extract_email_address_and_name(mail[:from])
|
||||
rescue StandardError
|
||||
nil
|
||||
end
|
||||
|
||||
def extract_from_fields_from_header(mail_object, header, comparison_headers: [])
|
||||
mail_object[header].each do |address_field|
|
||||
from_address = address_field.address
|
||||
from_display_name = address_field.display_name&.to_s
|
||||
def self.extract_email_address_and_name_from_mailman(mail)
|
||||
list_address, _ = Email::Receiver.extract_email_address_and_name(mail[:list_post])
|
||||
list_address, _ =
|
||||
Email::Receiver.extract_email_address_and_name(mail[:x_beenthere]) if list_address.blank?
|
||||
|
||||
comparison_failed = false
|
||||
comparison_headers.each do |comparison_header|
|
||||
comparison_header_address = mail_object[comparison_header].to_s[/<([^>]+)>/, 1]
|
||||
if comparison_header_address != from_address
|
||||
comparison_failed = true
|
||||
break
|
||||
end
|
||||
return if list_address.blank?
|
||||
|
||||
# the CC header often includes the name of the sender
|
||||
address_to_name = mail[:cc]&.element&.addresses&.to_h { [_1.address, _1.name] } || {}
|
||||
|
||||
%i[from reply_to x_mailfrom x_original_from].each do |header|
|
||||
next if mail[header].blank?
|
||||
email, name = Email::Receiver.extract_email_address_and_name(mail[header])
|
||||
if email.present? && email != list_address
|
||||
return email, name.presence || address_to_name[email]
|
||||
end
|
||||
|
||||
next if comparison_failed
|
||||
next if !from_address&.include?("@")
|
||||
return from_address&.downcase, from_display_name&.strip
|
||||
end
|
||||
|
||||
[nil, nil]
|
||||
end
|
||||
|
||||
def extract_from_address_and_name(value)
|
||||
if value[";"]
|
||||
from_display_name, from_address = value.split(";")
|
||||
return from_address&.strip&.downcase, from_display_name&.strip
|
||||
def self.extract_email_address_and_name(value)
|
||||
begin
|
||||
# ensure the email header value is a string
|
||||
value = value.to_s
|
||||
# in embedded emails, converts [mailto:foo@bar.com] to <foo@bar.com>
|
||||
value = value.gsub(/\[mailto:([^\[\]]+?)\]/, "<\\1>")
|
||||
# 'mailto:' suffix isn't supported by Mail::Address parsing
|
||||
value = value.gsub("mailto:", "")
|
||||
# parse the email header value
|
||||
parsed = Mail::Address.new(value)
|
||||
# extract the email address and name
|
||||
mail = parsed.address.to_s.downcase.strip
|
||||
name = parsed.name.to_s.strip
|
||||
# ensure the email address is "valid"
|
||||
if mail.include?("@")
|
||||
# remove surrounding quotes from the name
|
||||
name = name[1...-1] if name.size > 2 && name[/\A(['"]).+(\1)\z/]
|
||||
# return the email address and name
|
||||
[mail, name]
|
||||
end
|
||||
rescue Mail::Field::ParseError, Mail::Field::IncompleteParseError => e
|
||||
# something went wrong parsing the email header value, return nil
|
||||
end
|
||||
|
||||
if value[/<[^>]+>/]
|
||||
from_address = value[/<([^>]+)>/, 1]
|
||||
from_display_name = value[/\A([^<]+)/, 1]
|
||||
end
|
||||
|
||||
if (from_address.blank? || !from_address["@"]) && value[/\[mailto:[^\]]+\]/]
|
||||
from_address = value[/\[mailto:([^\]]+)\]/, 1]
|
||||
from_display_name = value[/\A([^\[]+)/, 1]
|
||||
end
|
||||
|
||||
[from_address&.downcase, from_display_name&.strip]
|
||||
end
|
||||
|
||||
def subject
|
||||
|
@ -1089,24 +1084,28 @@ module Email
|
|||
end
|
||||
|
||||
def forwarded_email_create_replies(destination, user)
|
||||
embedded = Mail.new(embedded_email_raw)
|
||||
email, display_name = parse_from_field(embedded)
|
||||
forwarded_by_address, forwarded_by_name =
|
||||
Email::Receiver.extract_email_address_and_name(@mail[:from])
|
||||
|
||||
if forwarded_by_address && forwarded_by_name
|
||||
@forwarded_by_user = stage_sender_user(forwarded_by_address, forwarded_by_name)
|
||||
end
|
||||
|
||||
return false if email.blank? || !email["@"]
|
||||
email_address, display_name =
|
||||
parse_from_field(embedded_email, process_forwarded_emails: false)
|
||||
|
||||
return false if email_address.blank? || !email_address.include?("@")
|
||||
|
||||
post =
|
||||
forwarded_email_create_topic(
|
||||
destination: destination,
|
||||
user: user,
|
||||
raw: try_to_encode(embedded.decoded, "UTF-8").presence || embedded.to_s,
|
||||
title: embedded.subject.presence || subject,
|
||||
date: embedded.date,
|
||||
embedded_user: lambda { find_or_create_user(email, display_name) },
|
||||
raw: try_to_encode(embedded_email.decoded, "UTF-8").presence || embedded_email.to_s,
|
||||
title: embedded_email.subject.presence || subject,
|
||||
date: embedded_email.date,
|
||||
embedded_user: lambda { find_or_create_user(email_address, display_name) },
|
||||
)
|
||||
|
||||
return false unless post
|
||||
|
||||
if post.topic
|
||||
|
@ -1143,25 +1142,12 @@ module Email
|
|||
true
|
||||
end
|
||||
|
||||
def forwarded_by_sender
|
||||
@forwarded_by_sender ||= extract_from_fields_from_header(@mail, :from)
|
||||
end
|
||||
|
||||
def forwarded_by_address
|
||||
@forwarded_by_address ||= forwarded_by_sender&.first
|
||||
end
|
||||
|
||||
def forwarded_by_name
|
||||
@forwarded_by_name ||= forwarded_by_sender&.first
|
||||
end
|
||||
|
||||
def forwarded_email_quote_forwarded(destination, user)
|
||||
embedded = embedded_email_raw
|
||||
raw = <<~MD
|
||||
#{@before_embedded}
|
||||
|
||||
[quote]
|
||||
#{PlainTextToMarkdown.new(embedded).to_markdown}
|
||||
#{PlainTextToMarkdown.new(@embedded_email_raw).to_markdown}
|
||||
[/quote]
|
||||
MD
|
||||
|
||||
|
@ -1528,7 +1514,7 @@ module Email
|
|||
address_field.decoded
|
||||
email = address_field.address.downcase
|
||||
display_name = address_field.display_name.try(:to_s)
|
||||
next unless email["@"]
|
||||
next if !email.include?("@")
|
||||
|
||||
if should_invite?(email)
|
||||
user = User.find_by_email(email)
|
||||
|
|
|
@ -207,3 +207,56 @@ task "emails:test", [:email] => [:environment] do |_, args|
|
|||
Consider changing it to 'no' before performing any further troubleshooting.
|
||||
TEXT
|
||||
end
|
||||
|
||||
desc "run this to fix users associated to emails mirrored from a mailman mailing list"
|
||||
task "emails:fix_mailman_users" => :environment do
|
||||
if !SiteSetting.enable_staged_users
|
||||
puts "Please enable staged users first"
|
||||
exit 1
|
||||
end
|
||||
|
||||
def find_or_create_user(email, name)
|
||||
user = nil
|
||||
|
||||
User.transaction do
|
||||
unless user = User.find_by_email(email)
|
||||
username = UserNameSuggester.sanitize_username(name) if name.present?
|
||||
username = UserNameSuggester.suggest(username.presence || email)
|
||||
name = name.presence || User.suggest_name(email)
|
||||
|
||||
begin
|
||||
user = User.create!(email: email, username: username, name: name, staged: true)
|
||||
rescue PG::UniqueViolation, ActiveRecord::RecordNotUnique, ActiveRecord::RecordInvalid
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
user
|
||||
end
|
||||
|
||||
IncomingEmail
|
||||
.includes(:user, :post)
|
||||
.where("raw LIKE '%X-Mailman-Version: %'")
|
||||
.find_each do |ie|
|
||||
next unless ie.post.present?
|
||||
|
||||
mail = Mail.new(ie.raw)
|
||||
email, name = Email::Receiver.extract_email_address_and_name_from_mailman(mail)
|
||||
|
||||
if email.blank? || email == ie.user.email
|
||||
putc "."
|
||||
elsif new_owner = find_or_create_user(email, name)
|
||||
PostOwnerChanger.new(
|
||||
post_ids: [ie.post_id],
|
||||
topic_id: ie.post.topic_id,
|
||||
new_owner: new_owner,
|
||||
acting_user: Discourse.system_user,
|
||||
skip_revision: true,
|
||||
).change_owner!
|
||||
putc "#"
|
||||
else
|
||||
putc "X"
|
||||
end
|
||||
end
|
||||
nil
|
||||
end
|
||||
|
|
|
@ -0,0 +1,19 @@
|
|||
Date: Tue, 16 May 2023 20:50:54 -0700
|
||||
From: Some One <some@one.com>
|
||||
Reply-To: Example users <list@ml.example.com>
|
||||
To: list@example.com
|
||||
Message-ID: <4460a72d7764ad03b5cecd65fe4c2fbd@foo.com>
|
||||
Subject: library 1.2.3 released!
|
||||
Mime-Version: 1.0
|
||||
Content-Type: text/plain; charset=UTF-8
|
||||
Content-Transfer-Encoding: 7bit
|
||||
X-MailFrom: some@one.com
|
||||
X-Mailman-Version: 2.3.4
|
||||
Precedence: list
|
||||
List-Id: Example users <list.ml.example.com>
|
||||
List-Owner: <mailto:list-owner@ml.example.com>
|
||||
List-Post: <mailto:list@ml.example.com>
|
||||
List-Subscribe: <mailto:list-join@ml.example.com>
|
||||
List-Unsubscribe: <mailto:list-leave@ml.example.com>
|
||||
|
||||
library version 1.2.3 has been released!
|
|
@ -0,0 +1,15 @@
|
|||
Date: Tue, 16 May 2023 20:50:54 -0700
|
||||
From: Some One via example <list@ml.example.com>
|
||||
Reply-To: some@one.com
|
||||
To: list@example.com
|
||||
Message-ID: <23e86ebe4d9f211967e48d284449c856@foo.com>
|
||||
Subject: library 1.2.3 released!
|
||||
Mime-Version: 1.0
|
||||
Content-Type: text/plain; charset=UTF-8
|
||||
Content-Transfer-Encoding: 7bit
|
||||
X-MailFrom: some@one.com
|
||||
X-Mailman-Version: 1.2.3
|
||||
X-BeenThere: list@ml.example.com
|
||||
Precedence: list
|
||||
|
||||
library version 1.2.3 has been released!
|
|
@ -0,0 +1,15 @@
|
|||
Date: Tue, 16 May 2023 20:50:54 -0700
|
||||
From: Some One via example <list@ml.example.com>
|
||||
To: list@example.com
|
||||
CC: Some One <some@one.com>
|
||||
Message-ID: <5a0d382572c511c24daf1558c02f6a19@foo.com>
|
||||
Subject: library 1.2.3 released!
|
||||
Mime-Version: 1.0
|
||||
Content-Type: text/plain; charset=UTF-8
|
||||
Content-Transfer-Encoding: 7bit
|
||||
X-MailFrom: some@one.com
|
||||
X-Mailman-Version: 2.2.2
|
||||
X-BeenThere: list@ml.example.com
|
||||
Precedence: list
|
||||
|
||||
library version 1.2.3 has been released!
|
|
@ -0,0 +1,14 @@
|
|||
Date: Tue, 16 May 2023 20:50:54 -0700
|
||||
From: Some One via example <list@ml.example.com>
|
||||
To: list@example.com
|
||||
Message-ID: <8df45354b9ca21a868023b2dd3296a8d@foo.com>
|
||||
Subject: library 1.2.3 released!
|
||||
Mime-Version: 1.0
|
||||
Content-Type: text/plain; charset=UTF-8
|
||||
Content-Transfer-Encoding: 7bit
|
||||
X-Original-From: some@one.com
|
||||
X-Mailman-Version: 1.2.3
|
||||
Precedence: list
|
||||
List-Post: <mailto:list@ml.example.com>
|
||||
|
||||
library version 1.2.3 has been released!
|
|
@ -7,7 +7,7 @@ Message-ID: <3848c3m98r439c348mc349@test.mailinglist.com>
|
|||
Mime-Version: 1.0
|
||||
Content-Type: text/plain
|
||||
Content-Transfer-Encoding: 7bit
|
||||
X-Original-From: Marston, John <johnmarston@reddeadtest.com>
|
||||
X-Original-From: "Marston, John" <johnmarston@reddeadtest.com>
|
||||
Precedence: list
|
||||
Mailing-list: list westernsupport@test.mailinglist.com; contact
|
||||
westernsupport+owners@test.mailinglist.com
|
||||
|
|
|
@ -218,14 +218,6 @@ RSpec.describe Email::Receiver do
|
|||
expect(IncomingEmail.last.error).to eq("RuntimeError")
|
||||
end
|
||||
|
||||
it "matches the correct user" do
|
||||
user = Fabricate(:user)
|
||||
email_log = Fabricate(:email_log, to_address: user.email, user: user, bounce_key: nil)
|
||||
email, name = Email::Receiver.new(email(:existing_user)).parse_from_field
|
||||
expect(email).to eq("existing@bar.com")
|
||||
expect(name).to eq("Foo Bar")
|
||||
end
|
||||
|
||||
it "strips null bytes from the subject" do
|
||||
expect do process(:null_byte_in_subject) end.to raise_error(
|
||||
Email::Receiver::BadDestinationAddress,
|
||||
|
@ -512,9 +504,8 @@ RSpec.describe Email::Receiver do
|
|||
)
|
||||
end
|
||||
|
||||
it "handles invalid from header" do
|
||||
expect { process(:invalid_from_1) }.to change { topic.posts.count }
|
||||
expect(topic.posts.last.raw).to eq("This email was sent with an invalid from header field.")
|
||||
it "raises a NoSenderDetectedError when the From header can't be parsed" do
|
||||
expect { process(:invalid_from_1) }.to raise_error(Email::Receiver::NoSenderDetectedError)
|
||||
end
|
||||
|
||||
it "raises a NoSenderDetectedError when the From header doesn't contain an email address" do
|
||||
|
@ -1996,6 +1987,38 @@ RSpec.describe Email::Receiver do
|
|||
end
|
||||
end
|
||||
|
||||
describe "mailman mirror" do
|
||||
fab!(:category) { Fabricate(:mailinglist_mirror_category) }
|
||||
|
||||
it "uses 'from' email address" do
|
||||
expect { process(:mailman_1) }.to change { Topic.count }
|
||||
user = Topic.last.user
|
||||
expect(user.email).to eq("some@one.com")
|
||||
expect(user.name).to eq("Some One")
|
||||
end
|
||||
|
||||
it "uses 'reply-to' email address" do
|
||||
expect { process(:mailman_2) }.to change { Topic.count }
|
||||
user = Topic.last.user
|
||||
expect(user.email).to eq("some@one.com")
|
||||
expect(user.name).to eq("Some")
|
||||
end
|
||||
|
||||
it "uses 'x-mailfrom' email address and name from CC" do
|
||||
expect { process(:mailman_3) }.to change { Topic.count }
|
||||
user = Topic.last.user
|
||||
expect(user.email).to eq("some@one.com")
|
||||
expect(user.name).to eq("Some One")
|
||||
end
|
||||
|
||||
it "uses 'x-original-from' email address" do
|
||||
expect { process(:mailman_4) }.to change { Topic.count }
|
||||
user = Topic.last.user
|
||||
expect(user.email).to eq("some@one.com")
|
||||
expect(user.name).to eq("Some")
|
||||
end
|
||||
end
|
||||
|
||||
describe "mailing list mirror" do
|
||||
fab!(:category) { Fabricate(:mailinglist_mirror_category) }
|
||||
|
||||
|
|
Loading…
Reference in New Issue