FIX: correctly extract body and/or reply from exchange emails (#30512)
When receiving emails sent with Exchange, we look for some markers to identify the body of the mail and the reply (aka. previous email).
For some reasons, those markers aren't 100% reliable and sometimes, only one of them is present.
The commit 20ba54d536
introduced the bug because the `HTML_EXTRACTERS` regex for exchange looks for either `messageBodySection` or `messageReplySection` but we were only using the `reply` section. So if an email had only the `body` section, it would not be correctly extracted.
This commit handle the cases where either one of them is missing and use the other one as the actual "reply". When both are present, it correctly elides the "reply" section.
This commit is contained in:
parent
9497a6165f
commit
d523c37057
|
@ -564,10 +564,21 @@ module Email
|
|||
end
|
||||
|
||||
def extract_from_exchange(doc)
|
||||
# Exchange is using the 'messageReplySection' class for forwarded emails
|
||||
# And 'messageBodySection' for the actual email
|
||||
elided = doc.css("div[name='messageReplySection']").remove
|
||||
to_markdown(doc.css("div[name='messageReplySection']").to_html, elided.to_html)
|
||||
# Exchange is using 'messageReplySection' for forwarded emails and 'messageBodySection' for the actual email
|
||||
reply = doc.css("div[name='messageReplySection']")
|
||||
body = doc.css("div[name='messageBodySection']")
|
||||
|
||||
if reply.present? && body.present?
|
||||
elided = doc.css("div[name='messageReplySection']").remove
|
||||
body = doc.css("div[name='messageBodySection']")
|
||||
to_markdown(body.to_html, elided.to_html)
|
||||
elsif reply.present?
|
||||
to_markdown(reply.to_html, "")
|
||||
elsif body.present?
|
||||
to_markdown(body.to_html, "")
|
||||
else
|
||||
to_markdown(doc.to_html, "")
|
||||
end
|
||||
end
|
||||
|
||||
def extract_from_apple_mail(doc)
|
||||
|
|
|
@ -0,0 +1,14 @@
|
|||
Return-Path: <discourse@bar.com>
|
||||
From: Foo Bar <discourse@bar.com>
|
||||
To: alt+4f97315cc828096c9cb34c6f1a0d6fe8@bar.com
|
||||
Date: Fri, 15 Jan 2017 00:12:43 +0100
|
||||
Message-ID: <180@foo.bar.mail>
|
||||
Mime-Version: 1.0
|
||||
Content-Type: text/html; charset=UTF-8
|
||||
Content-Transfer-Encoding: quoted-printable
|
||||
|
||||
<div>
|
||||
<div name="messageBodySection">
|
||||
<p>This is the <b>body</b> of the email.</p>
|
||||
</div>
|
||||
</div>
|
|
@ -0,0 +1,17 @@
|
|||
Return-Path: <discourse@bar.com>
|
||||
From: Foo Bar <discourse@bar.com>
|
||||
To: alt+4f97315cc828096c9cb34c6f1a0d6fe8@bar.com
|
||||
Date: Fri, 15 Jan 2017 00:12:43 +0100
|
||||
Message-ID: <180@foo.bar.mail>
|
||||
Mime-Version: 1.0
|
||||
Content-Type: text/html; charset=UTF-8
|
||||
Content-Transfer-Encoding: quoted-printable
|
||||
|
||||
<div>
|
||||
<div name="messageBodySection">
|
||||
<p>This is the <b>body</b> of the email.</p>
|
||||
</div>
|
||||
<div name="messageReplySection">
|
||||
<p>This is the <i>reply</i>!</p>
|
||||
</div>
|
||||
</div>
|
|
@ -0,0 +1,14 @@
|
|||
Return-Path: <discourse@bar.com>
|
||||
From: Foo Bar <discourse@bar.com>
|
||||
To: alt+4f97315cc828096c9cb34c6f1a0d6fe8@bar.com
|
||||
Date: Fri, 15 Jan 2017 00:12:43 +0100
|
||||
Message-ID: <180@foo.bar.mail>
|
||||
Mime-Version: 1.0
|
||||
Content-Type: text/html; charset=UTF-8
|
||||
Content-Transfer-Encoding: quoted-printable
|
||||
|
||||
<div>
|
||||
<div name="messageReplySection">
|
||||
<p>This is the <b>body !!</b> of the email.</p>
|
||||
</div>
|
||||
</div>
|
|
@ -415,9 +415,43 @@ RSpec.describe Email::Receiver do
|
|||
it "automatically elides gmail quotes" do
|
||||
SiteSetting.always_show_trimmed_content = true
|
||||
expect { process(:gmail_html_reply) }.to change { topic.posts.count }
|
||||
expect(topic.posts.last.raw).to eq(
|
||||
"This is a **GMAIL** reply ;)\n\n<details class='elided'>\n<summary title='Show trimmed content'>···</summary>\n\nThis is the *elided* part!\n\n</details>",
|
||||
)
|
||||
expect(topic.posts.last.raw).to eq <<~MD.strip
|
||||
This is a **GMAIL** reply ;)
|
||||
|
||||
<details class='elided'>
|
||||
<summary title='Show trimmed content'>···</summary>
|
||||
|
||||
This is the *elided* part!
|
||||
|
||||
</details>
|
||||
MD
|
||||
end
|
||||
|
||||
it "correctly extracts body from exchange emails" do
|
||||
SiteSetting.always_show_trimmed_content = true
|
||||
expect { process(:exchange_html_body) }.to change { topic.posts.count }
|
||||
expect(topic.posts.last.raw).to eq("This is the **body** of the email.")
|
||||
end
|
||||
|
||||
it "correctly extracts reply from exchange emails" do
|
||||
SiteSetting.always_show_trimmed_content = true
|
||||
expect { process(:exchange_html_reply) }.to change { topic.posts.count }
|
||||
expect(topic.posts.last.raw).to eq("This is the **body !!** of the email.")
|
||||
end
|
||||
|
||||
it "correctly extracts body & reply from exchange emails" do
|
||||
SiteSetting.always_show_trimmed_content = true
|
||||
expect { process(:exchange_html_body_and_reply) }.to change { topic.posts.count }
|
||||
expect(topic.posts.last.raw).to eq <<~MD.strip
|
||||
This is the **body** of the email.
|
||||
|
||||
<details class='elided'>
|
||||
<summary title='Show trimmed content'>···</summary>
|
||||
|
||||
This is the *reply*!
|
||||
|
||||
</details>
|
||||
MD
|
||||
end
|
||||
|
||||
it "doesn't process email with same message-id more than once" do
|
||||
|
|
Loading…
Reference in New Issue