FEATURE: escape HTML when cooking plaintext emails

This commit is contained in:
Gerhard Schlager 2017-11-15 16:39:29 +01:00
parent cef64e8f03
commit 9207dee69a
5 changed files with 201 additions and 47 deletions

View File

@ -19,7 +19,7 @@ class PostAnalyzer
return raw if cook_method == Post.cook_methods[:raw_html]
if cook_method == Post.cook_methods[:email]
cooked = EmailCook.new(raw).cook
cooked = EmailCook.new(raw).cook(opts)
else
cooked = PrettyText.cook(raw, opts)
end

View File

@ -38,6 +38,11 @@ module Email
attr_reader :mail
attr_reader :message_id
def self.formats
@formats ||= Enum.new(plaintext: 1,
markdown: 2)
end
def initialize(mail_string)
raise EmptyEmailError if mail_string.blank?
@staged_users = []
@ -236,9 +241,9 @@ module Email
end
if text.blank? || (SiteSetting.incoming_email_prefer_html && markdown.present?)
return [markdown, elided_markdown]
return [markdown, elided_markdown, Receiver::formats[:markdown]]
else
return [text, elided_text]
return [text, elided_text, Receiver::formats[:plaintext]]
end
end

View File

@ -1,13 +1,19 @@
# A very simple formatter for imported emails
require_dependency 'pretty_text'
# A very simple formatter for imported emails
class EmailCook
def self.url_regexp
/((?:https?:(?:\/{1,3}|[a-z0-9%])|www\d{0,3}[.])(?:[^\s()<>]+|\([^\s()<>]+\))+(?:\([^\s()<>]+\)|[^`!()\[\]{};:'".,<>?«»“”‘’\s]))/
@url_regexp ||= /((?:https?:(?:\/{1,3}|[a-z0-9%])|www\d{0,3}[.])(?:[^\s()<>]+|\([^\s()<>]+\))+(?:\([^\s()<>]+\)|[^`!()\[\]{};:'".,<>?«»“”‘’\s]))/
end
def self.raw_regexp
@raw_regexp ||= /^\[plaintext\]$\n(.*)\n^\[\/plaintext\]$(?:\s^\[attachments\]$\n(.*)\n^\[\/attachments\]$)?(?:\s^\[elided\]$\n(.*)\n^\[\/elided\]$)?/m
end
def initialize(raw)
@raw = raw
@body, @attachment_html, @elided = @raw.scan(EmailCook.raw_regexp).first
end
def add_quote(result, buffer)
@ -17,53 +23,62 @@ class EmailCook
end
end
def link_string!(str)
str.scan(EmailCook.url_regexp).each do |m|
def link_string!(line, unescaped_line)
unescaped_line = unescaped_line.strip
unescaped_line.scan(EmailCook.url_regexp).each do |m|
url = m[0]
if str.strip == url
if unescaped_line == url
# this could be oneboxed
val = %|<a href="#{url}" class="onebox" target="_blank">#{url}</a>|
else
val = %|<a href="#{url}">#{url}</a>|
end
str.gsub!(url, val)
line.gsub!(url, val)
end
end
def cook
def htmlify(text)
result = ""
in_text = false
in_quote = false
quote_buffer = ""
@raw.each_line do |l|
text.each_line do |line|
if l =~ /^\s*>/
if line =~ /^\s*>/
in_quote = true
link_string!(l)
quote_buffer << l.sub(/^[\s>]*/, '') << "<br>"
line.sub!(/^[\s>]*/, '')
unescaped_line = line
line = CGI.escapeHTML(line)
link_string!(line, unescaped_line)
quote_buffer << line << "<br>"
elsif in_quote
add_quote(result, quote_buffer)
quote_buffer = ""
in_quote = false
else
sz = l.size
sz = line.size
link_string!(l)
result << l
unescaped_line = line
line = CGI.escapeHTML(line)
link_string!(line, unescaped_line)
if sz < 60
result << "<br>"
if in_text
if in_text && line == "\n"
result << "<br>"
end
result << line
result << "<br>"
in_text = false
else
result << line
in_text = true
end
end
@ -77,4 +92,14 @@ class EmailCook
result
end
def cook(opts = {})
# fallback to PrettyText if we failed to detect a body
return PrettyText.cook(@raw, opts) if @body.nil?
result = htmlify(@body)
result << "\n<br>" << @attachment_html if @attachment_html.present?
result << "\n<br><br>" << Email::Receiver.elided_html(htmlify(@elided)) if @elided.present?
result
end
end

View File

@ -1,45 +1,164 @@
require 'rails_helper'
require 'email_cook'
require 'pretty_text'
describe EmailCook do
it "uses to PrettyText when there is no [plaintext] in raw" do
raw = "**Hello world!**"
expect(cook(raw)).to eq(PrettyText.cook(raw))
end
it 'adds linebreaks to short lines' do
expect(EmailCook.new("hello\nworld\n").cook).to eq("hello\n<br>world\n<br>")
it "adds linebreaks to short lines" do
raw = plaintext("hello\nworld\n")
expect(cook(raw)).to eq("hello\n<br>world\n<br>")
end
it "doesn't add linebreaks to long lines" do
long = <<LONG_EMAIL
Hello,
long = plaintext(<<~LONG_EMAIL)
Hello,
Lorem ipsum dolor sit amet, consectetur adipiscing elit. Nunc convallis volutpat
risus. Nulla ac faucibus quam, quis cursus lorem. Sed rutrum eget nunc sed accumsan.
Vestibulum feugiat mi vitae turpis tempor dignissim.
LONG_EMAIL
Lorem ipsum dolor sit amet, consectetur adipiscing elit. Nunc convallis volutpat
risus. Nulla ac faucibus quam, quis cursus lorem. Sed rutrum eget nunc sed accumsan.
Vestibulum feugiat mi vitae turpis tempor dignissim.
LONG_EMAIL
long_cooked = <<LONG_COOKED
Hello,
<br>
<br>Lorem ipsum dolor sit amet, consectetur adipiscing elit. Nunc convallis volutpat
risus. Nulla ac faucibus quam, quis cursus lorem. Sed rutrum eget nunc sed accumsan.
Vestibulum feugiat mi vitae turpis tempor dignissim.
<br><br>
LONG_COOKED
expect(EmailCook.new(long).cook).to eq(long_cooked.strip)
long_cooked = <<~LONG_COOKED.strip!
Hello,
<br>
<br>Lorem ipsum dolor sit amet, consectetur adipiscing elit. Nunc convallis volutpat
risus. Nulla ac faucibus quam, quis cursus lorem. Sed rutrum eget nunc sed accumsan.
Vestibulum feugiat mi vitae turpis tempor dignissim.
<br>
LONG_COOKED
expect(cook(long)).to eq(long_cooked)
end
it 'creates oneboxed link when the line contains only a link' do
expect(EmailCook.new("https://www.eviltrout.com").cook).to eq('<a href="https://www.eviltrout.com" class="onebox" target="_blank">https://www.eviltrout.com</a><br>')
it "replaces a blank line with 2 linebreaks" do
long = plaintext(<<~LONG_EMAIL)
Lorem ipsum dolor sit amet, consectetur adipiscing elit. Nunc convallis volutpat
risus.
Nulla ac faucibus quam, quis cursus lorem. Sed rutrum eget nunc sed accumsan.
Vestibulum feugiat mi vitae turpis tempor dignissim.
Stet clita kasd gubergren.
LONG_EMAIL
long_cooked = <<~LONG_COOKED.strip!
Lorem ipsum dolor sit amet, consectetur adipiscing elit. Nunc convallis volutpat
risus.
<br>Nulla ac faucibus quam, quis cursus lorem. Sed rutrum eget nunc sed accumsan.
<br>
<br>Vestibulum feugiat mi vitae turpis tempor dignissim.
<br>
<br>Stet clita kasd gubergren.
<br>
LONG_COOKED
expect(cook(long)).to eq(long_cooked)
end
it 'autolinks without the beginning of a line' do
expect(EmailCook.new("my site: https://www.eviltrout.com").cook).to eq('my site: <a href="https://www.eviltrout.com">https://www.eviltrout.com</a><br>')
it "escapes HTML" do
long = plaintext(<<~LONG_EMAIL)
Lorem ipsum dolor sit amet, consectetur adipiscing elit.
<form name="f1" method="post" action="test.html" onsubmit="javascript:showAlert()">
<input type="submit" name="submit" value="Click this button" />
</form>
Nunc convallis volutpat risus.
LONG_EMAIL
long_cooked = <<~LONG_COOKED.strip!
Lorem ipsum dolor sit amet, consectetur adipiscing elit.
<br>
<br>&lt;form name=&quot;f1&quot; method=&quot;post&quot; action=&quot;test.html&quot; onsubmit=&quot;javascript:showAlert()&quot;&gt;
&lt;input type=&quot;submit&quot; name=&quot;submit&quot; value=&quot;Click this button&quot; /&gt;
&lt;/form&gt;
<br>
<br>Nunc convallis volutpat risus.
<br>
LONG_COOKED
expect(cook(long)).to eq(long_cooked)
end
it 'autolinks without the end of a line' do
expect(EmailCook.new("https://www.eviltrout.com is my site").cook).to eq('<a href="https://www.eviltrout.com">https://www.eviltrout.com</a> is my site<br>')
it "creates oneboxed link when the line contains only a link" do
raw = plaintext("https://www.eviltrout.com")
expect(cook(raw)).to eq('<a href="https://www.eviltrout.com" class="onebox" target="_blank">https://www.eviltrout.com</a><br>')
end
it 'links even within a quote' do
expect(EmailCook.new("> https://www.eviltrout.com").cook).to eq('<blockquote><a href="https://www.eviltrout.com">https://www.eviltrout.com</a><br></blockquote>')
it "autolinks without the beginning of a line" do
raw = plaintext("my site: https://www.eviltrout.com")
expect(cook(raw)).to eq('my site: <a href="https://www.eviltrout.com">https://www.eviltrout.com</a><br>')
end
it "autolinks without the end of a line" do
raw = plaintext("https://www.eviltrout.com is my site")
expect(cook(raw)).to eq('<a href="https://www.eviltrout.com">https://www.eviltrout.com</a> is my site<br>')
end
it "links even within a quote" do
raw = plaintext("> https://www.eviltrout.com is my site")
expect(cook(raw)).to eq('<blockquote><a href="https://www.eviltrout.com">https://www.eviltrout.com</a> is my site<br></blockquote>')
end
it "it works and does not interpret Markdown in plaintext and elided" do
long = <<~LONG_EMAIL
[plaintext]
*Lorem ipsum* dolor sit amet, consectetur adipiscing elit.
[/plaintext]
[attachments]
<img src='some_image.png' width='100' height='100'>
[/attachments]
[elided]
At vero eos *et accusam* et justo duo dolores et ea rebum.
[/elided]
LONG_EMAIL
long_cooked = <<~LONG_COOKED
*Lorem ipsum* dolor sit amet, consectetur adipiscing elit.<br>
<br><img src='some_image.png' width='100' height='100'>
<br><br>
<details class='elided'>
<summary title='Show trimmed content'>&#183;&#183;&#183;</summary>
At vero eos *et accusam* et justo duo dolores et ea rebum.<br>
</details>
LONG_COOKED
expect(cook(long)).to eq(long_cooked)
end
it "works without attachments" do
long = <<~LONG_EMAIL
[plaintext]
*Lorem ipsum* dolor sit amet, consectetur adipiscing elit.
[/plaintext]
[elided]
At vero eos *et accusam* et justo duo dolores et ea rebum.
[/elided]
LONG_EMAIL
long_cooked = <<~LONG_COOKED
*Lorem ipsum* dolor sit amet, consectetur adipiscing elit.<br>
<br><br>
<details class='elided'>
<summary title='Show trimmed content'>&#183;&#183;&#183;</summary>
At vero eos *et accusam* et justo duo dolores et ea rebum.<br>
</details>
LONG_COOKED
expect(cook(long)).to eq(long_cooked)
end
def cook(raw)
EmailCook.new(raw).cook
end
def plaintext(text)
"[plaintext]\n#{text}\n[/plaintext]"
end
end

View File

@ -38,11 +38,16 @@ describe PostAnalyzer do
expect(cooked).to eq('Hello <div/> world')
end
it "does not interpret Markdown when cook_method is 'email'" do
cooked = post_analyzer.cook('*this is not italic* and here is a link: https://www.example.com', cook_method: Post.cook_methods[:email])
it "does not interpret Markdown when cook_method is 'email' and raw contains plaintext" do
cooked = post_analyzer.cook("[plaintext]\n*this is not italic* and here is a link: https://www.example.com\n[/plaintext]", cook_method: Post.cook_methods[:email])
expect(cooked).to eq('*this is not italic* and here is a link: <a href="https://www.example.com">https://www.example.com</a>')
end
it "does interpret Markdown when cook_method is 'email' and raw does not contain plaintext" do
cooked = post_analyzer.cook('*this is italic*', cook_method: Post.cook_methods[:email])
expect(cooked).to eq('<p><em>this is italic</em></p>')
end
it "does interpret Markdown when cook_method is 'regular'" do
cooked = post_analyzer.cook('*this is italic*', cook_method: Post.cook_methods[:regular])
expect(cooked).to eq('<p><em>this is italic</em></p>')