Special work to never include previous posts in parsed emails. Also a new attempt

to catch reply strings in different languages.
This commit is contained in:
Robin Ward 2013-07-24 14:22:32 -04:00
parent 0a7bbb08e1
commit 2319a824f8
5 changed files with 148 additions and 8 deletions

View File

@ -19,10 +19,18 @@ module Email
return Email::Receiver.results[:unprocessable] if @raw.blank?
@message = Mail::Message.new(@raw)
@body = EmailReplyParser.read(parse_body).visible_text
# First remove the known discourse stuff.
parse_body
return Email::Receiver.results[:unprocessable] if @body.blank?
# Then run the github EmailReplyParser on it in case we didn't catch it
@body = EmailReplyParser.read(@body).visible_text
discourse_email_parser
return Email::Receiver.results[:unprocessable] if @body.blank?
@reply_key = @message.to.first
# Extract the `reply_key` from the format the site has specified
@ -49,7 +57,8 @@ module Email
if @message.multipart?
@message.parts.each do |p|
if p.content_type =~ /text\/plain/
return p.body.to_s
@body = p.body.to_s
return @body
elsif p.content_type =~ /text\/html/
html = p.body.to_s
end
@ -58,10 +67,11 @@ module Email
html = @message.body.to_s if @message.content_type =~ /text\/html/
if html.present?
return scrub_html(html)
@body = scrub_html(html)
return @body
end
return @message.body.to_s.strip
@body = @message.body.to_s.strip
end
def scrub_html(html)
@ -76,8 +86,27 @@ module Email
return doc.xpath("//text()").text
end
def create_reply
def discourse_email_parser
lines = @body.lines
range_end = 0
email_year =
lines.each_with_index do |l, idx|
break if l =~ /\A\s*\-{3,80}\s*\z/ ||
l =~ Regexp.new("\\A\\s*" + I18n.t('user_notifications.previous_discussion') + "\\s*\\Z") ||
# This one might be controversial but so many reply lines have years, times and end with a colon.
# Let's try it and see how well it works.
(l =~ /\d{4}/ && l =~ /\d:\d\d/ && l =~ /\:$/)
range_end = idx
end
@body = lines[0..range_end].join
@body.strip!
end
def create_reply
# Try to post the body as a reply
creator = PostCreator.new(email_log.user,
raw: @body,

View File

@ -21,7 +21,7 @@ describe Email::Receiver do
let(:reply_below) { File.read("#{Rails.root}/spec/fixtures/emails/multipart.eml") }
let(:receiver) { Email::Receiver.new(reply_below) }
it "does something" do
it "processes correctly" do
receiver.process
expect(receiver.body).to eq(
"So presumably all the quoted garbage and my (proper) signature will get
@ -33,18 +33,48 @@ stripped from my reply?")
let(:reply_below) { File.read("#{Rails.root}/spec/fixtures/emails/html_only.eml") }
let(:receiver) { Email::Receiver.new(reply_below) }
it "does something" do
it "processes correctly" do
receiver.process
expect(receiver.body).to eq("The EC2 instance - I've seen that there tends to be odd and " +
"unrecommended settings on the Bitnami installs that I've checked out.")
end
end
describe "it supports a dutch reply" do
let(:dutch) { File.read("#{Rails.root}/spec/fixtures/emails/dutch.eml") }
let(:receiver) { Email::Receiver.new(dutch) }
it "processes correctly" do
receiver.process
expect(receiver.body).to eq("Dit is een antwoord in het Nederlands.")
end
end
describe "if wrote is on a second line" do
let(:wrote) { File.read("#{Rails.root}/spec/fixtures/emails/multiline_wrote.eml") }
let(:receiver) { Email::Receiver.new(wrote) }
it "processes correctly" do
receiver.process
expect(receiver.body).to eq("Thanks!")
end
end
describe "remove previous discussion" do
let(:previous) { File.read("#{Rails.root}/spec/fixtures/emails/previous.eml") }
let(:receiver) { Email::Receiver.new(previous) }
it "processes correctly" do
receiver.process
expect(receiver.body).to eq("This will not include the previous discussion that is present in this email.")
end
end
describe "multiple paragraphs" do
let(:paragraphs) { File.read("#{Rails.root}/spec/fixtures/emails/paragraphs.eml") }
let(:receiver) { Email::Receiver.new(paragraphs) }
it "does something" do
it "processes correctly" do
receiver.process
expect(receiver.body).to eq(
"Is there any reason the *old* candy can't be be kept in silos while the new candy

20
spec/fixtures/emails/dutch.eml vendored Normal file
View File

@ -0,0 +1,20 @@
Delivered-To: discourse-reply+cd480e301683c9902891f15968bf07a5@discourse.org
Received: by 10.194.216.104 with SMTP id op8csp80593wjc;
Wed, 24 Jul 2013 07:59:14 -0700 (PDT)
Return-Path: <walter.white@googlemail.com>
References: <topic/5043@discourse.org> <51efeb9b36c34_66dc2dfce6811866@discourse.mail>
From: Walter White <walter.white@googlemail.com>
In-Reply-To: <51efeb9b36c34_66dc2dfce6811866@discourse.mail>
Mime-Version: 1.0 (1.0)
Date: Wed, 24 Jul 2013 15:59:10 +0100
Message-ID: <4597127794206131679@unknownmsgid>
Subject: Re: [Discourse] new reply to your post in 'Crystal Blue'
To: walter via Discourse <discourse-reply+cd480e301683c9902891f15968bf07a5@discourse.org>
Content-Type: multipart/alternative; boundary=001a11c20edc15a39304e2432790
Dit is een antwoord in het Nederlands.
Op 18 juli 2013 10:23 schreef Sander Datema het volgende:
Dit is de originele post.

View File

@ -0,0 +1,23 @@
Delivered-To: discourse-reply+cd480e301683c9902891f15968bf07a5@discourse.org
Received: by 10.194.216.104 with SMTP id op8csp80593wjc;
Wed, 24 Jul 2013 07:59:14 -0700 (PDT)
Return-Path: <walter.white@googlemail.com>
References: <topic/5043@discourse.org> <51efeb9b36c34_66dc2dfce6811866@discourse.mail>
From: Walter White <walter.white@googlemail.com>
In-Reply-To: <51efeb9b36c34_66dc2dfce6811866@discourse.mail>
Mime-Version: 1.0 (1.0)
Date: Wed, 24 Jul 2013 15:59:10 +0100
Message-ID: <4597127794206131679@unknownmsgid>
Subject: Re: [Discourse] new reply to your post in 'Crystal Blue'
To: walter via Discourse <discourse-reply+cd480e301683c9902891f15968bf07a5@discourse.org>
Content-Type: multipart/alternative; boundary=001a11c20edc15a39304e2432790
Thanks!
On 24 Jul 2013, at 15:58, walter via Discourse <info@discourse.org>
wrote:
walter <http://discourse.org/users/walter> July 24
You look great today Walter.

38
spec/fixtures/emails/previous.eml vendored Normal file
View File

@ -0,0 +1,38 @@
Delivered-To: discourse-reply+cd480e301683c9902891f15968bf07a5@discourse.org
Received: by 10.194.216.104 with SMTP id op8csp80593wjc;
Wed, 24 Jul 2013 07:59:14 -0700 (PDT)
Return-Path: <walter.white@googlemail.com>
References: <topic/5043@discourse.org> <51efeb9b36c34_66dc2dfce6811866@discourse.mail>
From: Walter White <walter.white@googlemail.com>
In-Reply-To: <51efeb9b36c34_66dc2dfce6811866@discourse.mail>
Mime-Version: 1.0 (1.0)
Date: Wed, 24 Jul 2013 15:59:10 +0100
Message-ID: <4597127794206131679@unknownmsgid>
Subject: Re: [Discourse] new reply to your post in 'Crystal Blue'
To: walter via Discourse <discourse-reply+cd480e301683c9902891f15968bf07a5@discourse.org>
Content-Type: multipart/alternative; boundary=001a11c20edc15a39304e2432790
This will not include the previous discussion that is present in this email.
------------------------------
Previous discussion
skylerwhite<http://discourse.org/users/skylerwhite> July 24
This is a reply.
fring <http://discourse.org/users/fring> July 24
This is an older reply.
hank_schrader <http://discourse.org/users/hank_schrader> July 24
Of course another reply here.
walterwhite <http://discourse.org/users/walterwhite> July 24
------------------------------
To respond, reply to this email or visit
http://discourse.org/t/crystal-blue/5043/10in
your browser.
To unsubscribe from these emails, visit your user
preferences<http://discourse.org/user_preferences>
.