Special work to never include previous posts in parsed emails. Also a new attempt

to catch reply strings in different languages.
This commit is contained in:
Robin Ward 2013-07-24 14:22:32 -04:00
parent 0a7bbb08e1
commit 2319a824f8
5 changed files with 67 additions and 8 deletions

View File

@ -19,10 +19,18 @@ module Email
return Email::Receiver.results[:unprocessable] if @raw.blank?
@message = Mail::Message.new(@raw)
@body = EmailReplyParser.read(parse_body).visible_text
# First remove the known discourse stuff.
parse_body
return Email::Receiver.results[:unprocessable] if @body.blank?
# Then run the github EmailReplyParser on it in case we didn't catch it
@body = EmailReplyParser.read(@body).visible_text
discourse_email_parser
return Email::Receiver.results[:unprocessable] if @body.blank?
@reply_key = @message.to.first
# Extract the `reply_key` from the format the site has specified
@ -49,7 +57,8 @@ module Email
if @message.multipart?
@message.parts.each do |p|
if p.content_type =~ /text\/plain/
return p.body.to_s
@body = p.body.to_s
return @body
elsif p.content_type =~ /text\/html/
html = p.body.to_s
end
@ -58,10 +67,11 @@ module Email
html = @message.body.to_s if @message.content_type =~ /text\/html/
if html.present?
return scrub_html(html)
@body = scrub_html(html)
return @body
end
return @message.body.to_s.strip
@body = @message.body.to_s.strip
end
def scrub_html(html)
@ -76,8 +86,27 @@ module Email
return doc.xpath("//text()").text
end
def create_reply
def discourse_email_parser
lines = @body.lines
range_end = 0
email_year =
lines.each_with_index do |l, idx|
break if l =~ /\A\s*\-{3,80}\s*\z/ ||
l =~ Regexp.new("\\A\\s*" + I18n.t('user_notifications.previous_discussion') + "\\s*\\Z") ||
# This one might be controversial but so many reply lines have years, times and end with a colon.
# Let's try it and see how well it works.
(l =~ /\d{4}/ && l =~ /\d:\d\d/ && l =~ /\:$/)
range_end = idx
end
@body = lines[0..range_end].join
@body.strip!
end
def create_reply
# Try to post the body as a reply
creator = PostCreator.new(email_log.user,
raw: @body,

View File

@ -21,7 +21,7 @@ describe Email::Receiver do
let(:reply_below) { File.read("#{Rails.root}/spec/fixtures/emails/multipart.eml") }
let(:receiver) { Email::Receiver.new(reply_below) }
it "does something" do
it "processes correctly" do
receiver.process
expect(receiver.body).to eq(
"So presumably all the quoted garbage and my (proper) signature will get
@ -33,18 +33,48 @@ stripped from my reply?")
let(:reply_below) { File.read("#{Rails.root}/spec/fixtures/emails/html_only.eml") }
let(:receiver) { Email::Receiver.new(reply_below) }
it "does something" do
it "processes correctly" do
receiver.process
expect(receiver.body).to eq("The EC2 instance - I've seen that there tends to be odd and " +
"unrecommended settings on the Bitnami installs that I've checked out.")
end
end
describe "it supports a dutch reply" do
let(:dutch) { File.read("#{Rails.root}/spec/fixtures/emails/dutch.eml") }
let(:receiver) { Email::Receiver.new(dutch) }
it "processes correctly" do
receiver.process
expect(receiver.body).to eq("Dit is een antwoord in het Nederlands.")
end
end
describe "if wrote is on a second line" do
let(:wrote) { File.read("#{Rails.root}/spec/fixtures/emails/multiline_wrote.eml") }
let(:receiver) { Email::Receiver.new(wrote) }
it "processes correctly" do
receiver.process
expect(receiver.body).to eq("Thanks!")
end
end
describe "remove previous discussion" do
let(:previous) { File.read("#{Rails.root}/spec/fixtures/emails/previous.eml") }
let(:receiver) { Email::Receiver.new(previous) }
it "processes correctly" do
receiver.process
expect(receiver.body).to eq("This will not include the previous discussion that is present in this email.")
end
end
describe "multiple paragraphs" do
let(:paragraphs) { File.read("#{Rails.root}/spec/fixtures/emails/paragraphs.eml") }
let(:receiver) { Email::Receiver.new(paragraphs) }
it "does something" do
it "processes correctly" do
receiver.process
expect(receiver.body).to eq(
"Is there any reason the *old* candy can't be be kept in silos while the new candy

BIN
spec/fixtures/emails/dutch.eml vendored Normal file

Binary file not shown.

BIN
spec/fixtures/emails/multiline_wrote.eml vendored Normal file

Binary file not shown.

BIN
spec/fixtures/emails/previous.eml vendored Normal file

Binary file not shown.