From db9d998de3aa5e5dab04f88cdbe10d17bcbe5b20 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?R=C3=A9gis=20Hanol?= Date: Fri, 19 May 2023 10:33:48 +0200 Subject: [PATCH] FIX: improve mailman email parsing (#21627) https://meta.discourse.org/t/improving-mailman-email-parsing/253041 When mirroring a public mailling list which uses mailman, there were some cases where the incoming email was not associated to the proper user. As it happens, for various (undertermined) reasons, the email from the sender is often not in the `From` header but can be in any of the following headers: `Reply-To`, `CC`, `X-Original-From`, `X-MailFrom`. It might be in other headers as well, but those were the ones we found the most reliable. --- lib/email/receiver.rb | 164 ++++++++---------- lib/tasks/emails.rake | 53 ++++++ spec/fixtures/emails/mailman_1.eml | 19 ++ spec/fixtures/emails/mailman_2.eml | 15 ++ spec/fixtures/emails/mailman_3.eml | 15 ++ spec/fixtures/emails/mailman_4.eml | 14 ++ ..._different_to_from_quoted_display_name.eml | 2 +- spec/lib/email/receiver_spec.rb | 45 +++-- 8 files changed, 226 insertions(+), 101 deletions(-) create mode 100644 spec/fixtures/emails/mailman_1.eml create mode 100644 spec/fixtures/emails/mailman_2.eml create mode 100644 spec/fixtures/emails/mailman_3.eml create mode 100644 spec/fixtures/emails/mailman_4.eml diff --git a/lib/email/receiver.rb b/lib/email/receiver.rb index 1d5a044d35f..93ecc91aa57 100644 --- a/lib/email/receiver.rb +++ b/lib/email/receiver.rb @@ -107,7 +107,7 @@ module Email Email::Validator.ensure_valid!(@mail) - @from_email, @from_display_name = parse_from_field + @from_email, @from_display_name = parse_from_field(@mail) @from_user = User.find_by_email(@from_email) @incoming_email = create_incoming_email @@ -660,14 +660,15 @@ module Email reply.split(reply_above_line_regex)[0] end - def parse_from_field(mail = nil) - mail ||= @mail - + def parse_from_field(mail, process_forwarded_emails: true) if email_log.present? email = email_log.to_address || email_log.user&.email return email, email_log.user&.username elsif mail.bounced? - Array.wrap(mail.final_recipient).each { |from| return extract_from_address_and_name(from) } + # "Final-Recipient" has a specific format ( ;
) + # cf. https://www.ietf.org/rfc/rfc2298.html#section-3.2.4 + address_type, generic_address = mail.final_recipient.to_s.split(";").map { _1.to_s.strip } + return generic_address, nil if generic_address.include?("@") && address_type == "rfc822" end return unless mail[:from] @@ -675,87 +676,81 @@ module Email # For forwarded emails, where the from address matches a group incoming # email, we want to use the from address of the original email sender, # which we can extract from embedded_email_raw. - if has_been_forwarded? - if mail[:from].to_s =~ group_incoming_emails_regex && embedded_email[:from].errors.blank? - from_address, from_display_name = extract_from_fields_from_header(embedded_email, :from) - return from_address, from_display_name if from_address + if process_forwarded_emails && has_been_forwarded? + if mail[:from].to_s =~ group_incoming_emails_regex + if embedded_email && embedded_email[:from].errors.blank? + from_address, from_display_name = + Email::Receiver.extract_email_address_and_name(embedded_email[:from]) + return from_address, from_display_name if from_address + end end end + # extract proper sender when using mailman mailing list + if mail[:x_mailman_version].present? + address, name = Email::Receiver.extract_email_address_and_name_from_mailman(mail) + return address, name if address + end + # For now we are only using the Reply-To header if the email has # been forwarded via Google Groups, which is why we are checking the # X-Original-From header too. In future we may want to use the Reply-To # header in more cases. - if mail["X-Original-From"].present? - if mail[:reply_to] && mail[:reply_to].errors.blank? - from_address, from_display_name = - extract_from_fields_from_header( - mail, - :reply_to, - comparison_headers: ["X-Original-From"], - ) - return from_address, from_display_name if from_address - end + if mail[:x_original_from].present? && mail[:reply_to].present? + original_from_address, _ = + Email::Receiver.extract_email_address_and_name(mail[:x_original_from]) + reply_to_address, reply_to_name = + Email::Receiver.extract_email_address_and_name(mail[:reply_to]) + return reply_to_address, reply_to_name if original_from_address == reply_to_address end - if mail[:from].errors.blank? - from_address, from_display_name = extract_from_fields_from_header(mail, :from) - return from_address, from_display_name if from_address - end - - return extract_from_address_and_name(mail.from) if mail.from.is_a? String - - if mail.from.is_a? Mail::AddressContainer - mail.from.each do |from| - from_address, from_display_name = extract_from_address_and_name(from) - return from_address, from_display_name if from_address - end - end - - nil + Email::Receiver.extract_email_address_and_name(mail[:from]) rescue StandardError nil end - def extract_from_fields_from_header(mail_object, header, comparison_headers: []) - mail_object[header].each do |address_field| - from_address = address_field.address - from_display_name = address_field.display_name&.to_s + def self.extract_email_address_and_name_from_mailman(mail) + list_address, _ = Email::Receiver.extract_email_address_and_name(mail[:list_post]) + list_address, _ = + Email::Receiver.extract_email_address_and_name(mail[:x_beenthere]) if list_address.blank? - comparison_failed = false - comparison_headers.each do |comparison_header| - comparison_header_address = mail_object[comparison_header].to_s[/<([^>]+)>/, 1] - if comparison_header_address != from_address - comparison_failed = true - break - end + return if list_address.blank? + + # the CC header often includes the name of the sender + address_to_name = mail[:cc]&.element&.addresses&.to_h { [_1.address, _1.name] } || {} + + %i[from reply_to x_mailfrom x_original_from].each do |header| + next if mail[header].blank? + email, name = Email::Receiver.extract_email_address_and_name(mail[header]) + if email.present? && email != list_address + return email, name.presence || address_to_name[email] end - - next if comparison_failed - next if !from_address&.include?("@") - return from_address&.downcase, from_display_name&.strip end - - [nil, nil] end - def extract_from_address_and_name(value) - if value[";"] - from_display_name, from_address = value.split(";") - return from_address&.strip&.downcase, from_display_name&.strip + def self.extract_email_address_and_name(value) + begin + # ensure the email header value is a string + value = value.to_s + # in embedded emails, converts [mailto:foo@bar.com] to + value = value.gsub(/\[mailto:([^\[\]]+?)\]/, "<\\1>") + # 'mailto:' suffix isn't supported by Mail::Address parsing + value = value.gsub("mailto:", "") + # parse the email header value + parsed = Mail::Address.new(value) + # extract the email address and name + mail = parsed.address.to_s.downcase.strip + name = parsed.name.to_s.strip + # ensure the email address is "valid" + if mail.include?("@") + # remove surrounding quotes from the name + name = name[1...-1] if name.size > 2 && name[/\A(['"]).+(\1)\z/] + # return the email address and name + [mail, name] + end + rescue Mail::Field::ParseError, Mail::Field::IncompleteParseError => e + # something went wrong parsing the email header value, return nil end - - if value[/<[^>]+>/] - from_address = value[/<([^>]+)>/, 1] - from_display_name = value[/\A([^<]+)/, 1] - end - - if (from_address.blank? || !from_address["@"]) && value[/\[mailto:[^\]]+\]/] - from_address = value[/\[mailto:([^\]]+)\]/, 1] - from_display_name = value[/\A([^\[]+)/, 1] - end - - [from_address&.downcase, from_display_name&.strip] end def subject @@ -1089,24 +1084,28 @@ module Email end def forwarded_email_create_replies(destination, user) - embedded = Mail.new(embedded_email_raw) - email, display_name = parse_from_field(embedded) + forwarded_by_address, forwarded_by_name = + Email::Receiver.extract_email_address_and_name(@mail[:from]) if forwarded_by_address && forwarded_by_name @forwarded_by_user = stage_sender_user(forwarded_by_address, forwarded_by_name) end - return false if email.blank? || !email["@"] + email_address, display_name = + parse_from_field(embedded_email, process_forwarded_emails: false) + + return false if email_address.blank? || !email_address.include?("@") post = forwarded_email_create_topic( destination: destination, user: user, - raw: try_to_encode(embedded.decoded, "UTF-8").presence || embedded.to_s, - title: embedded.subject.presence || subject, - date: embedded.date, - embedded_user: lambda { find_or_create_user(email, display_name) }, + raw: try_to_encode(embedded_email.decoded, "UTF-8").presence || embedded_email.to_s, + title: embedded_email.subject.presence || subject, + date: embedded_email.date, + embedded_user: lambda { find_or_create_user(email_address, display_name) }, ) + return false unless post if post.topic @@ -1143,25 +1142,12 @@ module Email true end - def forwarded_by_sender - @forwarded_by_sender ||= extract_from_fields_from_header(@mail, :from) - end - - def forwarded_by_address - @forwarded_by_address ||= forwarded_by_sender&.first - end - - def forwarded_by_name - @forwarded_by_name ||= forwarded_by_sender&.first - end - def forwarded_email_quote_forwarded(destination, user) - embedded = embedded_email_raw raw = <<~MD #{@before_embedded} [quote] - #{PlainTextToMarkdown.new(embedded).to_markdown} + #{PlainTextToMarkdown.new(@embedded_email_raw).to_markdown} [/quote] MD @@ -1528,7 +1514,7 @@ module Email address_field.decoded email = address_field.address.downcase display_name = address_field.display_name.try(:to_s) - next unless email["@"] + next if !email.include?("@") if should_invite?(email) user = User.find_by_email(email) diff --git a/lib/tasks/emails.rake b/lib/tasks/emails.rake index c9042d8dd38..bfa4313071e 100644 --- a/lib/tasks/emails.rake +++ b/lib/tasks/emails.rake @@ -207,3 +207,56 @@ task "emails:test", [:email] => [:environment] do |_, args| Consider changing it to 'no' before performing any further troubleshooting. TEXT end + +desc "run this to fix users associated to emails mirrored from a mailman mailing list" +task "emails:fix_mailman_users" => :environment do + if !SiteSetting.enable_staged_users + puts "Please enable staged users first" + exit 1 + end + + def find_or_create_user(email, name) + user = nil + + User.transaction do + unless user = User.find_by_email(email) + username = UserNameSuggester.sanitize_username(name) if name.present? + username = UserNameSuggester.suggest(username.presence || email) + name = name.presence || User.suggest_name(email) + + begin + user = User.create!(email: email, username: username, name: name, staged: true) + rescue PG::UniqueViolation, ActiveRecord::RecordNotUnique, ActiveRecord::RecordInvalid + end + end + end + + user + end + + IncomingEmail + .includes(:user, :post) + .where("raw LIKE '%X-Mailman-Version: %'") + .find_each do |ie| + next unless ie.post.present? + + mail = Mail.new(ie.raw) + email, name = Email::Receiver.extract_email_address_and_name_from_mailman(mail) + + if email.blank? || email == ie.user.email + putc "." + elsif new_owner = find_or_create_user(email, name) + PostOwnerChanger.new( + post_ids: [ie.post_id], + topic_id: ie.post.topic_id, + new_owner: new_owner, + acting_user: Discourse.system_user, + skip_revision: true, + ).change_owner! + putc "#" + else + putc "X" + end + end + nil +end diff --git a/spec/fixtures/emails/mailman_1.eml b/spec/fixtures/emails/mailman_1.eml new file mode 100644 index 00000000000..cdfd5bb891d --- /dev/null +++ b/spec/fixtures/emails/mailman_1.eml @@ -0,0 +1,19 @@ +Date: Tue, 16 May 2023 20:50:54 -0700 +From: Some One +Reply-To: Example users +To: list@example.com +Message-ID: <4460a72d7764ad03b5cecd65fe4c2fbd@foo.com> +Subject: library 1.2.3 released! +Mime-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 7bit +X-MailFrom: some@one.com +X-Mailman-Version: 2.3.4 +Precedence: list +List-Id: Example users +List-Owner: +List-Post: +List-Subscribe: +List-Unsubscribe: + +library version 1.2.3 has been released! \ No newline at end of file diff --git a/spec/fixtures/emails/mailman_2.eml b/spec/fixtures/emails/mailman_2.eml new file mode 100644 index 00000000000..907aaf131c2 --- /dev/null +++ b/spec/fixtures/emails/mailman_2.eml @@ -0,0 +1,15 @@ +Date: Tue, 16 May 2023 20:50:54 -0700 +From: Some One via example +Reply-To: some@one.com +To: list@example.com +Message-ID: <23e86ebe4d9f211967e48d284449c856@foo.com> +Subject: library 1.2.3 released! +Mime-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 7bit +X-MailFrom: some@one.com +X-Mailman-Version: 1.2.3 +X-BeenThere: list@ml.example.com +Precedence: list + +library version 1.2.3 has been released! \ No newline at end of file diff --git a/spec/fixtures/emails/mailman_3.eml b/spec/fixtures/emails/mailman_3.eml new file mode 100644 index 00000000000..60d0f7e7b6a --- /dev/null +++ b/spec/fixtures/emails/mailman_3.eml @@ -0,0 +1,15 @@ +Date: Tue, 16 May 2023 20:50:54 -0700 +From: Some One via example +To: list@example.com +CC: Some One +Message-ID: <5a0d382572c511c24daf1558c02f6a19@foo.com> +Subject: library 1.2.3 released! +Mime-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 7bit +X-MailFrom: some@one.com +X-Mailman-Version: 2.2.2 +X-BeenThere: list@ml.example.com +Precedence: list + +library version 1.2.3 has been released! \ No newline at end of file diff --git a/spec/fixtures/emails/mailman_4.eml b/spec/fixtures/emails/mailman_4.eml new file mode 100644 index 00000000000..7875ed71f1b --- /dev/null +++ b/spec/fixtures/emails/mailman_4.eml @@ -0,0 +1,14 @@ +Date: Tue, 16 May 2023 20:50:54 -0700 +From: Some One via example +To: list@example.com +Message-ID: <8df45354b9ca21a868023b2dd3296a8d@foo.com> +Subject: library 1.2.3 released! +Mime-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 7bit +X-Original-From: some@one.com +X-Mailman-Version: 1.2.3 +Precedence: list +List-Post: + +library version 1.2.3 has been released! \ No newline at end of file diff --git a/spec/fixtures/emails/reply_to_different_to_from_quoted_display_name.eml b/spec/fixtures/emails/reply_to_different_to_from_quoted_display_name.eml index f74499707c5..4b6cefecda9 100644 --- a/spec/fixtures/emails/reply_to_different_to_from_quoted_display_name.eml +++ b/spec/fixtures/emails/reply_to_different_to_from_quoted_display_name.eml @@ -7,7 +7,7 @@ Message-ID: <3848c3m98r439c348mc349@test.mailinglist.com> Mime-Version: 1.0 Content-Type: text/plain Content-Transfer-Encoding: 7bit -X-Original-From: Marston, John +X-Original-From: "Marston, John" Precedence: list Mailing-list: list westernsupport@test.mailinglist.com; contact westernsupport+owners@test.mailinglist.com diff --git a/spec/lib/email/receiver_spec.rb b/spec/lib/email/receiver_spec.rb index 4910880bc81..6b67176be1a 100644 --- a/spec/lib/email/receiver_spec.rb +++ b/spec/lib/email/receiver_spec.rb @@ -218,14 +218,6 @@ RSpec.describe Email::Receiver do expect(IncomingEmail.last.error).to eq("RuntimeError") end - it "matches the correct user" do - user = Fabricate(:user) - email_log = Fabricate(:email_log, to_address: user.email, user: user, bounce_key: nil) - email, name = Email::Receiver.new(email(:existing_user)).parse_from_field - expect(email).to eq("existing@bar.com") - expect(name).to eq("Foo Bar") - end - it "strips null bytes from the subject" do expect do process(:null_byte_in_subject) end.to raise_error( Email::Receiver::BadDestinationAddress, @@ -512,9 +504,8 @@ RSpec.describe Email::Receiver do ) end - it "handles invalid from header" do - expect { process(:invalid_from_1) }.to change { topic.posts.count } - expect(topic.posts.last.raw).to eq("This email was sent with an invalid from header field.") + it "raises a NoSenderDetectedError when the From header can't be parsed" do + expect { process(:invalid_from_1) }.to raise_error(Email::Receiver::NoSenderDetectedError) end it "raises a NoSenderDetectedError when the From header doesn't contain an email address" do @@ -1996,6 +1987,38 @@ RSpec.describe Email::Receiver do end end + describe "mailman mirror" do + fab!(:category) { Fabricate(:mailinglist_mirror_category) } + + it "uses 'from' email address" do + expect { process(:mailman_1) }.to change { Topic.count } + user = Topic.last.user + expect(user.email).to eq("some@one.com") + expect(user.name).to eq("Some One") + end + + it "uses 'reply-to' email address" do + expect { process(:mailman_2) }.to change { Topic.count } + user = Topic.last.user + expect(user.email).to eq("some@one.com") + expect(user.name).to eq("Some") + end + + it "uses 'x-mailfrom' email address and name from CC" do + expect { process(:mailman_3) }.to change { Topic.count } + user = Topic.last.user + expect(user.email).to eq("some@one.com") + expect(user.name).to eq("Some One") + end + + it "uses 'x-original-from' email address" do + expect { process(:mailman_4) }.to change { Topic.count } + user = Topic.last.user + expect(user.email).to eq("some@one.com") + expect(user.name).to eq("Some") + end + end + describe "mailing list mirror" do fab!(:category) { Fabricate(:mailinglist_mirror_category) }