Merge pull request #5400 from gschlager/plaintext
FEATURE: convert plain text emails to markdown
This commit is contained in:
commit
7c659ece1e
|
@ -2,6 +2,7 @@ require "digest"
|
||||||
require_dependency "new_post_manager"
|
require_dependency "new_post_manager"
|
||||||
require_dependency "post_action_creator"
|
require_dependency "post_action_creator"
|
||||||
require_dependency "html_to_markdown"
|
require_dependency "html_to_markdown"
|
||||||
|
require_dependency "plain_text_to_markdown"
|
||||||
require_dependency "upload_creator"
|
require_dependency "upload_creator"
|
||||||
|
|
||||||
module Email
|
module Email
|
||||||
|
@ -43,12 +44,13 @@ module Email
|
||||||
markdown: 2)
|
markdown: 2)
|
||||||
end
|
end
|
||||||
|
|
||||||
def initialize(mail_string)
|
def initialize(mail_string, opts = {})
|
||||||
raise EmptyEmailError if mail_string.blank?
|
raise EmptyEmailError if mail_string.blank?
|
||||||
@staged_users = []
|
@staged_users = []
|
||||||
@raw_email = try_to_encode(mail_string, "UTF-8") || try_to_encode(mail_string, "ISO-8859-1") || mail_string
|
@raw_email = try_to_encode(mail_string, "UTF-8") || try_to_encode(mail_string, "ISO-8859-1") || mail_string
|
||||||
@mail = Mail.new(@raw_email)
|
@mail = Mail.new(@raw_email)
|
||||||
@message_id = @mail.message_id.presence || Digest::MD5.hexdigest(mail_string)
|
@message_id = @mail.message_id.presence || Digest::MD5.hexdigest(mail_string)
|
||||||
|
@opts = opts
|
||||||
end
|
end
|
||||||
|
|
||||||
def process!
|
def process!
|
||||||
|
@ -222,19 +224,32 @@ module Email
|
||||||
def select_body
|
def select_body
|
||||||
text = nil
|
text = nil
|
||||||
html = nil
|
html = nil
|
||||||
|
text_content_type = nil
|
||||||
|
|
||||||
if @mail.multipart?
|
if @mail.multipart?
|
||||||
text = fix_charset(@mail.text_part)
|
text = fix_charset(@mail.text_part)
|
||||||
html = fix_charset(@mail.html_part)
|
html = fix_charset(@mail.html_part)
|
||||||
|
text_content_type = @mail.text_part&.content_type
|
||||||
elsif @mail.content_type.to_s["text/html"]
|
elsif @mail.content_type.to_s["text/html"]
|
||||||
html = fix_charset(@mail)
|
html = fix_charset(@mail)
|
||||||
else
|
else
|
||||||
text = fix_charset(@mail)
|
text = fix_charset(@mail)
|
||||||
|
text_content_type = @mail.content_type
|
||||||
end
|
end
|
||||||
|
|
||||||
text, elided_text = if text.present?
|
if text.present?
|
||||||
text = trim_discourse_markers(text)
|
text = trim_discourse_markers(text)
|
||||||
EmailReplyTrimmer.trim(text, true)
|
text, elided_text = EmailReplyTrimmer.trim(text, true)
|
||||||
|
|
||||||
|
if @opts[:convert_plaintext] || sent_to_mailinglist_mirror?
|
||||||
|
text_content_type ||= ""
|
||||||
|
converter_opts = {
|
||||||
|
format_flowed: !!(text_content_type =~ /format\s*=\s*["']?flowed["']?/i),
|
||||||
|
delete_flowed_space: !!(text_content_type =~ /DelSp\s*=\s*["']?yes["']?/i)
|
||||||
|
}
|
||||||
|
text = PlainTextToMarkdown.new(text, converter_opts).to_markdown
|
||||||
|
elided_text = PlainTextToMarkdown.new(elided_text, converter_opts).to_markdown
|
||||||
|
end
|
||||||
end
|
end
|
||||||
|
|
||||||
markdown, elided_markdown = if html.present?
|
markdown, elided_markdown = if html.present?
|
||||||
|
@ -755,8 +770,8 @@ module Email
|
||||||
|
|
||||||
def self.elided_html(elided)
|
def self.elided_html(elided)
|
||||||
html = "\n\n" << "<details class='elided'>" << "\n"
|
html = "\n\n" << "<details class='elided'>" << "\n"
|
||||||
html << "<summary title='#{I18n.t('emails.incoming.show_trimmed_content')}'>···</summary>" << "\n"
|
html << "<summary title='#{I18n.t('emails.incoming.show_trimmed_content')}'>···</summary>" << "\n\n"
|
||||||
html << elided << "\n"
|
html << elided << "\n\n"
|
||||||
html << "</details>" << "\n"
|
html << "</details>" << "\n"
|
||||||
html
|
html
|
||||||
end
|
end
|
||||||
|
|
|
@ -0,0 +1,190 @@
|
||||||
|
class PlainTextToMarkdown
|
||||||
|
SIGNATURE_SEPARATOR ||= "-- ".freeze
|
||||||
|
|
||||||
|
URL_REGEX ||= /((?:https?:(?:\/{1,3}|[a-z0-9%])|www\d{0,3}[.])(?:[^\s()<>]+|\([^\s()<>]+\))+(?:\([^\s()<>]+\)|[^`!()\[\]{};:'".,<>?«»“”‘’\s]))/i
|
||||||
|
|
||||||
|
def initialize(plaintext, opts = {})
|
||||||
|
@plaintext = plaintext
|
||||||
|
@lines = []
|
||||||
|
|
||||||
|
@format_flowed = opts[:format_flowed] || false
|
||||||
|
@delete_flowed_space = opts[:delete_flowed_space] || false
|
||||||
|
end
|
||||||
|
|
||||||
|
def to_markdown
|
||||||
|
prepare_lines
|
||||||
|
classify_lines
|
||||||
|
|
||||||
|
markdown = ""
|
||||||
|
last_quote_level = 0
|
||||||
|
last_line_blank = false
|
||||||
|
|
||||||
|
@lines.each do |line|
|
||||||
|
current_line_blank = line.text.blank?
|
||||||
|
|
||||||
|
unless last_line_blank && current_line_blank
|
||||||
|
if line.quote_level > 0
|
||||||
|
quote_identifiers = ">" * line.quote_level
|
||||||
|
markdown << quote_identifiers << "\n" unless line.quote_level >= last_quote_level || current_line_blank
|
||||||
|
markdown << quote_identifiers
|
||||||
|
markdown << " " unless current_line_blank
|
||||||
|
else
|
||||||
|
markdown << "\n" unless last_quote_level == 0 || current_line_blank
|
||||||
|
end
|
||||||
|
|
||||||
|
markdown << convert_text(line)
|
||||||
|
markdown << "\n"
|
||||||
|
end
|
||||||
|
|
||||||
|
last_line_blank = current_line_blank
|
||||||
|
last_quote_level = line.quote_level
|
||||||
|
end
|
||||||
|
|
||||||
|
markdown.rstrip!
|
||||||
|
markdown
|
||||||
|
end
|
||||||
|
|
||||||
|
private
|
||||||
|
|
||||||
|
class CodeBlock < Struct.new(:start_line, :end_line)
|
||||||
|
def initialize(start_line, end_line = nil)
|
||||||
|
super
|
||||||
|
end
|
||||||
|
|
||||||
|
def valid?
|
||||||
|
start_line.present? && end_line.present?
|
||||||
|
end
|
||||||
|
end
|
||||||
|
|
||||||
|
class Line < Struct.new(:text, :quote_level, :code_block)
|
||||||
|
def initialize(text, quote_level = 0, code_block = nil)
|
||||||
|
super
|
||||||
|
end
|
||||||
|
|
||||||
|
def valid_code_block?
|
||||||
|
code_block&.valid?
|
||||||
|
end
|
||||||
|
end
|
||||||
|
|
||||||
|
def prepare_lines
|
||||||
|
previous_line = nil
|
||||||
|
|
||||||
|
@plaintext.each_line do |text|
|
||||||
|
text.chomp!
|
||||||
|
line = Line.new(text)
|
||||||
|
|
||||||
|
remove_quote_level_indicators!(line)
|
||||||
|
|
||||||
|
if @format_flowed
|
||||||
|
line = merge_lines(line, previous_line)
|
||||||
|
@lines << line unless line == previous_line
|
||||||
|
else
|
||||||
|
@lines << line
|
||||||
|
end
|
||||||
|
|
||||||
|
previous_line = line
|
||||||
|
end
|
||||||
|
end
|
||||||
|
|
||||||
|
def classify_lines
|
||||||
|
previous_line = nil
|
||||||
|
|
||||||
|
@lines.each do |line|
|
||||||
|
classify_line_as_code!(line, previous_line)
|
||||||
|
|
||||||
|
previous_line = line
|
||||||
|
end
|
||||||
|
end
|
||||||
|
|
||||||
|
# @param line [Line]
|
||||||
|
def remove_quote_level_indicators!(line)
|
||||||
|
match_data = line.text.match(/^(?<indicators>>+)\s?(?<text>.*)/)
|
||||||
|
|
||||||
|
if match_data
|
||||||
|
line.text = match_data[:text]
|
||||||
|
line.quote_level = match_data[:indicators].length
|
||||||
|
end
|
||||||
|
end
|
||||||
|
|
||||||
|
# @param line [Line]
|
||||||
|
# @param previous_line [Line]
|
||||||
|
# @return [Line]
|
||||||
|
def merge_lines(line, previous_line)
|
||||||
|
return line if previous_line.nil? || line.text.blank?
|
||||||
|
return line if line.text == SIGNATURE_SEPARATOR || previous_line.text == SIGNATURE_SEPARATOR
|
||||||
|
return line unless line.quote_level == previous_line.quote_level && previous_line.text.end_with?(" ")
|
||||||
|
|
||||||
|
previous_line.text = previous_line.text[0...-1] if @delete_flowed_space
|
||||||
|
previous_line.text += line.text
|
||||||
|
previous_line
|
||||||
|
end
|
||||||
|
|
||||||
|
# @param line [Line]
|
||||||
|
# @param previous_line [Line]
|
||||||
|
def classify_line_as_code!(line, previous_line)
|
||||||
|
line.code_block = previous_line.code_block unless previous_line.nil? || previous_line.valid_code_block?
|
||||||
|
return unless line.text =~ /^\s{0,3}```/
|
||||||
|
|
||||||
|
if line.code_block.present?
|
||||||
|
line.code_block.end_line = line
|
||||||
|
else
|
||||||
|
line.code_block = CodeBlock.new(line)
|
||||||
|
end
|
||||||
|
end
|
||||||
|
|
||||||
|
# @param line [Line]
|
||||||
|
# @return [string]
|
||||||
|
def convert_text(line)
|
||||||
|
text = line.text
|
||||||
|
|
||||||
|
if line.valid_code_block?
|
||||||
|
code_block = line.code_block
|
||||||
|
return code_block.start_line == line || code_block.end_line == line ? text.lstrip : text
|
||||||
|
end
|
||||||
|
|
||||||
|
converted_text = replace_duplicate_links(text)
|
||||||
|
converted_text = escape_special_characters(converted_text)
|
||||||
|
converted_text = indent_with_non_breaking_spaces(converted_text)
|
||||||
|
converted_text
|
||||||
|
end
|
||||||
|
|
||||||
|
def replace_duplicate_links(text)
|
||||||
|
text.to_enum(:scan, URL_REGEX)
|
||||||
|
.map { $& }
|
||||||
|
.group_by { |url| url }
|
||||||
|
.keep_if { |_, urls | urls.length > 1 }
|
||||||
|
.keys.each do |url|
|
||||||
|
|
||||||
|
text.gsub!(Regexp.new(%Q|#{url}(\s*[()\\[\\]<>«»'"“”‘’]?#{url}[()\\[\\]<>«»'"“”‘’]?)|, Regexp::IGNORECASE), url)
|
||||||
|
end
|
||||||
|
|
||||||
|
text
|
||||||
|
end
|
||||||
|
|
||||||
|
def indent_with_non_breaking_spaces(text)
|
||||||
|
text.sub(/^\s+/) do |s|
|
||||||
|
# replace tabs with 2 spaces
|
||||||
|
s.gsub!("\t", " ")
|
||||||
|
|
||||||
|
# replace indentation with non-breaking spaces
|
||||||
|
s.length > 1 ? " " * s.length : s
|
||||||
|
end
|
||||||
|
end
|
||||||
|
|
||||||
|
def escape_special_characters(text)
|
||||||
|
escaped_text = ""
|
||||||
|
|
||||||
|
text.split(URL_REGEX).each do |text_part|
|
||||||
|
if text_part =~ URL_REGEX
|
||||||
|
# no escaping withing URLs
|
||||||
|
escaped_text << text_part
|
||||||
|
else
|
||||||
|
# escape Markdown and HTML
|
||||||
|
text_part.gsub!(/[\\`*_{}\[\]()#+\-.!~]/) { |c| "\\#{c}" }
|
||||||
|
escaped_text << CGI.escapeHTML(text_part)
|
||||||
|
end
|
||||||
|
end
|
||||||
|
|
||||||
|
escaped_text
|
||||||
|
end
|
||||||
|
end
|
|
@ -104,30 +104,19 @@ module ImportScripts::Mbox
|
||||||
id: row['msg_id'],
|
id: row['msg_id'],
|
||||||
user_id: user_id,
|
user_id: user_id,
|
||||||
created_at: to_time(row['email_date']),
|
created_at: to_time(row['email_date']),
|
||||||
raw: format_raw(row['body'], attachment_html, row['elided'], row['format']),
|
raw: format_raw(row['body'], attachment_html, row['elided']),
|
||||||
raw_email: row['raw_message'],
|
raw_email: row['raw_message'],
|
||||||
via_email: true,
|
via_email: true,
|
||||||
cook_method: Post.cook_methods[:email],
|
|
||||||
post_create_action: proc do |post|
|
post_create_action: proc do |post|
|
||||||
create_incoming_email(post, row)
|
create_incoming_email(post, row)
|
||||||
end
|
end
|
||||||
}
|
}
|
||||||
end
|
end
|
||||||
|
|
||||||
def format_raw(email_body, attachment_html, elided, format)
|
def format_raw(email_body, attachment_html, elided)
|
||||||
email_body ||= ''
|
body = email_body || ''
|
||||||
|
body << attachment_html if attachment_html.present?
|
||||||
case format
|
body << Email::Receiver.elided_html(elided) if elided.present?
|
||||||
when Email::Receiver::formats[:markdown]
|
|
||||||
body = email_body
|
|
||||||
body << attachment_html if attachment_html.present?
|
|
||||||
body << Email::Receiver.elided_html(elided) if elided.present?
|
|
||||||
when Email::Receiver::formats[:plaintext]
|
|
||||||
body = %|[plaintext]\n#{escape_tags(email_body)}\n[/plaintext]|
|
|
||||||
body << %|\n[attachments]\n#{escape_tags(attachment_html)}\n[/attachments]| if attachment_html.present?
|
|
||||||
body << %|\n[elided]\n#{escape_tags(elided)}\n[/elided]| if elided.present?
|
|
||||||
end
|
|
||||||
|
|
||||||
body
|
body
|
||||||
end
|
end
|
||||||
|
|
||||||
|
|
|
@ -163,7 +163,7 @@ module ImportScripts::Mbox
|
||||||
end
|
end
|
||||||
|
|
||||||
def read_mail_from_string(raw_message)
|
def read_mail_from_string(raw_message)
|
||||||
Email::Receiver.new(raw_message) unless raw_message.blank?
|
Email::Receiver.new(raw_message, convert_plaintext: true) unless raw_message.blank?
|
||||||
end
|
end
|
||||||
|
|
||||||
def extract_reply_message_ids(mail)
|
def extract_reply_message_ids(mail)
|
||||||
|
|
|
@ -344,7 +344,7 @@ describe Email::Receiver do
|
||||||
topic.save
|
topic.save
|
||||||
|
|
||||||
expect { process(:original_message) }.to change { topic.posts.count }
|
expect { process(:original_message) }.to change { topic.posts.count }
|
||||||
expect(topic.posts.last.raw).to eq("This is a reply :)\n\n<details class='elided'>\n<summary title='Show trimmed content'>···</summary>\n---Original Message---\nThis part should not be included\n</details>")
|
expect(topic.posts.last.raw).to eq("This is a reply :)\n\n<details class='elided'>\n<summary title='Show trimmed content'>···</summary>\n\n---Original Message---\nThis part should not be included\n\n</details>")
|
||||||
end
|
end
|
||||||
|
|
||||||
it "doesn't include the 'elided' part of the original message when always_show_trimmed_content is disabled" do
|
it "doesn't include the 'elided' part of the original message when always_show_trimmed_content is disabled" do
|
||||||
|
@ -356,7 +356,7 @@ describe Email::Receiver do
|
||||||
it "adds the 'elided' part of the original message for public replies when always_show_trimmed_content is enabled" do
|
it "adds the 'elided' part of the original message for public replies when always_show_trimmed_content is enabled" do
|
||||||
SiteSetting.always_show_trimmed_content = true
|
SiteSetting.always_show_trimmed_content = true
|
||||||
expect { process(:original_message) }.to change { topic.posts.count }.from(1).to(2)
|
expect { process(:original_message) }.to change { topic.posts.count }.from(1).to(2)
|
||||||
expect(topic.posts.last.raw).to eq("This is a reply :)\n\n<details class='elided'>\n<summary title='Show trimmed content'>···</summary>\n---Original Message---\nThis part should not be included\n</details>")
|
expect(topic.posts.last.raw).to eq("This is a reply :)\n\n<details class='elided'>\n<summary title='Show trimmed content'>···</summary>\n\n---Original Message---\nThis part should not be included\n\n</details>")
|
||||||
end
|
end
|
||||||
|
|
||||||
it "supports attached images in TEXT part" do
|
it "supports attached images in TEXT part" do
|
||||||
|
|
|
@ -146,7 +146,9 @@ describe EmailCook do
|
||||||
|
|
||||||
<details class='elided'>
|
<details class='elided'>
|
||||||
<summary title='Show trimmed content'>···</summary>
|
<summary title='Show trimmed content'>···</summary>
|
||||||
|
|
||||||
At vero eos *et accusam* et justo duo dolores et ea rebum.<br>
|
At vero eos *et accusam* et justo duo dolores et ea rebum.<br>
|
||||||
|
|
||||||
</details>
|
</details>
|
||||||
LONG_COOKED
|
LONG_COOKED
|
||||||
|
|
||||||
|
@ -169,7 +171,9 @@ describe EmailCook do
|
||||||
|
|
||||||
<details class='elided'>
|
<details class='elided'>
|
||||||
<summary title='Show trimmed content'>···</summary>
|
<summary title='Show trimmed content'>···</summary>
|
||||||
|
|
||||||
At vero eos *et accusam* et justo duo dolores et ea rebum.<br>
|
At vero eos *et accusam* et justo duo dolores et ea rebum.<br>
|
||||||
|
|
||||||
</details>
|
</details>
|
||||||
LONG_COOKED
|
LONG_COOKED
|
||||||
|
|
||||||
|
|
|
@ -0,0 +1,197 @@
|
||||||
|
require 'rails_helper'
|
||||||
|
require 'plain_text_to_markdown'
|
||||||
|
|
||||||
|
describe PlainTextToMarkdown do
|
||||||
|
def to_markdown(text, opts = {})
|
||||||
|
PlainTextToMarkdown.new(text, opts).to_markdown
|
||||||
|
end
|
||||||
|
|
||||||
|
let(:nbsp) { " " }
|
||||||
|
|
||||||
|
context "quotes" do
|
||||||
|
it "uses the correct quote level" do
|
||||||
|
expect(to_markdown("> foo")).to eq("> foo")
|
||||||
|
expect(to_markdown(">>> foo")).to eq(">>> foo")
|
||||||
|
expect(to_markdown(">>>>>>> foo")).to eq(">>>>>>> foo")
|
||||||
|
end
|
||||||
|
|
||||||
|
it "ignores the first whitespace after the quote identifier" do
|
||||||
|
expect(to_markdown(">foo")).to eq("> foo")
|
||||||
|
expect(to_markdown("> foo")).to eq("> foo")
|
||||||
|
expect(to_markdown(">\tfoo")).to eq("> foo")
|
||||||
|
|
||||||
|
expect(to_markdown("> foo")).to eq("> foo")
|
||||||
|
expect(to_markdown(">\t foo")).to eq("> foo")
|
||||||
|
end
|
||||||
|
|
||||||
|
it "adds a blank line after a quote if it is followed by text" do
|
||||||
|
expect(to_markdown("> foo\nbar")).to eq("> foo\n\nbar")
|
||||||
|
expect(to_markdown(">> foo\nbar")).to eq(">> foo\n\nbar")
|
||||||
|
end
|
||||||
|
|
||||||
|
it "ignores multiple consecutive blank lines" do
|
||||||
|
expect(to_markdown("> foo\n\nbar")).to eq("> foo\n\nbar")
|
||||||
|
expect(to_markdown("> foo\n\n\nbar")).to eq("> foo\n\nbar")
|
||||||
|
expect(to_markdown("> foo\n> \n>\n>\n> bar")).to eq("> foo\n>\n> bar")
|
||||||
|
end
|
||||||
|
|
||||||
|
it "adds an additional line with quote identifier if the quote level is decreasing" do
|
||||||
|
expect(to_markdown(">> foo\n>bar")).to eq(">> foo\n>\n> bar")
|
||||||
|
expect(to_markdown(">>>> foo\n>bar")).to eq(">>>> foo\n>\n> bar")
|
||||||
|
expect(to_markdown(">> foo\nno quote\n>bar")).to eq(">> foo\n\nno quote\n> bar")
|
||||||
|
end
|
||||||
|
|
||||||
|
it "does not add an additional line with quote identifier if the quote level is decreasing and text is blank" do
|
||||||
|
expect(to_markdown(">>> foo\n>>\n>> bar")).to eq(">>> foo\n>>\n>> bar")
|
||||||
|
end
|
||||||
|
end
|
||||||
|
|
||||||
|
context "special characters" do
|
||||||
|
it "escapes special Markdown characters" do
|
||||||
|
expect(to_markdown('\ backslash')).to eq('\\\\ backslash')
|
||||||
|
expect(to_markdown('` backtick')).to eq('\` backtick')
|
||||||
|
expect(to_markdown('* asterisk')).to eq('\* asterisk')
|
||||||
|
expect(to_markdown('_ underscore')).to eq('\_ underscore')
|
||||||
|
expect(to_markdown('{} curly braces')).to eq('\{\} curly braces')
|
||||||
|
expect(to_markdown('[] square brackets')).to eq('\[\] square brackets')
|
||||||
|
expect(to_markdown('() parentheses')).to eq('\(\) parentheses')
|
||||||
|
expect(to_markdown('# hash mark')).to eq('\# hash mark')
|
||||||
|
expect(to_markdown('+ plus sign')).to eq('\+ plus sign')
|
||||||
|
expect(to_markdown('- minus sign')).to eq('\- minus sign')
|
||||||
|
expect(to_markdown('. dot')).to eq('\. dot')
|
||||||
|
expect(to_markdown('! exclamation mark')).to eq('\! exclamation mark')
|
||||||
|
expect(to_markdown('~ tilde')).to eq('\~ tilde')
|
||||||
|
end
|
||||||
|
|
||||||
|
it "escapes special HTML characters" do
|
||||||
|
expect(to_markdown("' single quote")).to eq("' single quote")
|
||||||
|
expect(to_markdown("\" double quote")).to eq("" double quote")
|
||||||
|
expect(to_markdown("& ampersand")).to eq("& ampersand")
|
||||||
|
expect(to_markdown("<> less-than and greater-than sign")).to eq("<> less\\-than and greater\\-than sign")
|
||||||
|
end
|
||||||
|
|
||||||
|
it "escapes special characters but ignores links" do
|
||||||
|
expect(to_markdown("*some text* https://www.example.com/foo.html?a=1&b=0 & <https://www.example.com/bar.html?a=1&b=0> *more text*"))
|
||||||
|
.to eq("\\*some text\\* https://www.example.com/foo.html?a=1&b=0 & <https://www.example.com/bar.html?a=1&b=0> \\*more text\\*")
|
||||||
|
end
|
||||||
|
end
|
||||||
|
|
||||||
|
context "indentation" do
|
||||||
|
it "does not replace one leading whitespace" do
|
||||||
|
expect(to_markdown(" foo")).to eq(" foo")
|
||||||
|
end
|
||||||
|
|
||||||
|
it "replaces leading whitespaces with non-breaking spaces" do
|
||||||
|
expect(to_markdown(" foo")).to eq("#{nbsp}#{nbsp}foo")
|
||||||
|
expect(to_markdown(" foo")).to eq("#{nbsp}#{nbsp}#{nbsp}#{nbsp}foo")
|
||||||
|
end
|
||||||
|
|
||||||
|
it "replaces each leading tabs with two non-breaking spaces" do
|
||||||
|
expect(to_markdown("\tfoo")).to eq("#{nbsp}#{nbsp}foo")
|
||||||
|
expect(to_markdown(" \tfoo")).to eq("#{nbsp}#{nbsp}#{nbsp}foo")
|
||||||
|
expect(to_markdown("\t foo")).to eq("#{nbsp}#{nbsp}#{nbsp}foo")
|
||||||
|
expect(to_markdown(" \t foo")).to eq("#{nbsp}#{nbsp}#{nbsp}#{nbsp}foo")
|
||||||
|
expect(to_markdown("\t\tfoo")).to eq("#{nbsp}#{nbsp}#{nbsp}#{nbsp}foo")
|
||||||
|
end
|
||||||
|
|
||||||
|
it "correctly replaces leading whitespaces within quotes" do
|
||||||
|
expect(to_markdown("> foo")).to eq("> foo")
|
||||||
|
expect(to_markdown("> foo")).to eq("> #{nbsp}#{nbsp}foo")
|
||||||
|
end
|
||||||
|
|
||||||
|
it "does not replace whitespaces within text" do
|
||||||
|
expect(to_markdown("foo bar")).to eq("foo bar")
|
||||||
|
expect(to_markdown("foo\t\tbar")).to eq("foo\t\tbar")
|
||||||
|
end
|
||||||
|
end
|
||||||
|
|
||||||
|
context "format=flowed" do
|
||||||
|
it "concats lines ending with a space" do
|
||||||
|
text = "Lorem ipsum dolor sit amet, consectetur \nadipiscing elit. Quasi vero, inquit, \nperpetua oratio rhetorum solum, non \netiam philosophorum sit."
|
||||||
|
markdown = "Lorem ipsum dolor sit amet, consectetur adipiscing elit\\. Quasi vero, inquit, perpetua oratio rhetorum solum, non etiam philosophorum sit\\."
|
||||||
|
|
||||||
|
expect(to_markdown(text, format_flowed: true)).to eq(markdown)
|
||||||
|
end
|
||||||
|
|
||||||
|
it "does not concat lines when there is an empty line between" do
|
||||||
|
text = "Lorem ipsum dolor sit amet, consectetur \nadipiscing elit. \n\nQuasi vero, inquit, \nperpetua oratio rhetorum solum, non \netiam philosophorum sit."
|
||||||
|
markdown = "Lorem ipsum dolor sit amet, consectetur adipiscing elit\\. \n\nQuasi vero, inquit, perpetua oratio rhetorum solum, non etiam philosophorum sit\\."
|
||||||
|
|
||||||
|
expect(to_markdown(text, format_flowed: true)).to eq(markdown)
|
||||||
|
end
|
||||||
|
|
||||||
|
it "concats quoted lines ending with a space" do
|
||||||
|
text = "> Lorem ipsum dolor sit amet, consectetur \n> adipiscing elit. Quasi vero, inquit, \n> perpetua oratio rhetorum solum, non \n> etiam philosophorum sit."
|
||||||
|
markdown = "> Lorem ipsum dolor sit amet, consectetur adipiscing elit\\. Quasi vero, inquit, perpetua oratio rhetorum solum, non etiam philosophorum sit\\."
|
||||||
|
|
||||||
|
expect(to_markdown(text, format_flowed: true)).to eq(markdown)
|
||||||
|
end
|
||||||
|
|
||||||
|
it "does not concat quoted lines ending with a space when the quote level differs" do
|
||||||
|
text = "> Lorem ipsum dolor sit amet, consectetur \n> adipiscing elit. \n>> Quasi vero, inquit, \n>> perpetua oratio rhetorum solum, non \n> etiam philosophorum sit."
|
||||||
|
markdown = "> Lorem ipsum dolor sit amet, consectetur adipiscing elit\\. \n>> Quasi vero, inquit, perpetua oratio rhetorum solum, non \n>\n> etiam philosophorum sit\\."
|
||||||
|
|
||||||
|
expect(to_markdown(text, format_flowed: true)).to eq(markdown)
|
||||||
|
end
|
||||||
|
|
||||||
|
it "does not recognize a signature separator as start of flowed text" do
|
||||||
|
text = "-- \nsignature line 1\nsignature line 2"
|
||||||
|
markdown = "\\-\\- \nsignature line 1\nsignature line 2"
|
||||||
|
|
||||||
|
expect(to_markdown(text, format_flowed: true)).to eq(markdown)
|
||||||
|
end
|
||||||
|
|
||||||
|
it "does not concat lines when there is a signature separator" do
|
||||||
|
text = "Lorem ipsum \ndolor sit amet \n-- \nsignature line 1\nsignature line 2"
|
||||||
|
markdown = "Lorem ipsum dolor sit amet \n\\-\\- \nsignature line 1\nsignature line 2"
|
||||||
|
|
||||||
|
expect(to_markdown(text, format_flowed: true)).to eq(markdown)
|
||||||
|
end
|
||||||
|
|
||||||
|
it "removes the trailing space if DelSp is set to 'yes'" do
|
||||||
|
text = "Lorem ipsum dolor sit amet, consectetur \nadipiscing elit. \nQuasi vero, inquit"
|
||||||
|
markdown = "Lorem ipsum dolor sit amet, consecteturadipiscing elit\\.Quasi vero, inquit"
|
||||||
|
|
||||||
|
expect(to_markdown(text, format_flowed: true, delete_flowed_space: true)).to eq(markdown)
|
||||||
|
end
|
||||||
|
end
|
||||||
|
|
||||||
|
context "links" do
|
||||||
|
it "removes duplicate links" do
|
||||||
|
expect(to_markdown("foo https://www.example.com/foo.html <https://www.example.com/foo.html> bar"))
|
||||||
|
.to eq("foo https://www.example.com/foo.html bar")
|
||||||
|
|
||||||
|
expect(to_markdown("foo https://www.example.com/foo.html (https://www.example.com/foo.html) bar"))
|
||||||
|
.to eq("foo https://www.example.com/foo.html bar")
|
||||||
|
|
||||||
|
expect(to_markdown("foo https://www.example.com/foo.html https://www.example.com/foo.html bar"))
|
||||||
|
.to eq("foo https://www.example.com/foo.html bar")
|
||||||
|
end
|
||||||
|
|
||||||
|
it "does not removes duplicate links when there is text between the links" do
|
||||||
|
expect(to_markdown("foo https://www.example.com/foo.html bar https://www.example.com/foo.html baz"))
|
||||||
|
.to eq("foo https://www.example.com/foo.html bar https://www.example.com/foo.html baz")
|
||||||
|
end
|
||||||
|
end
|
||||||
|
|
||||||
|
context "code" do
|
||||||
|
it "detects matching Markdown code block within backticks" do
|
||||||
|
expect(to_markdown("foo\n```\n<this is code>\n```")).to eq("foo\n```\n<this is code>\n```")
|
||||||
|
end
|
||||||
|
|
||||||
|
it "does not detect Markdown code block when backticks are not on new line" do
|
||||||
|
expect(to_markdown("foo\n```\n<this is code> ```")).to eq("foo\n\\`\\`\\`\n<this is code> \\`\\`\\`")
|
||||||
|
end
|
||||||
|
|
||||||
|
it "does not detect Markdown code block when backticks are indented by more than 3 whitespaces" do
|
||||||
|
expect(to_markdown("foo\n ```\n<this is code>\n ```")).to include("<this is code>")
|
||||||
|
expect(to_markdown("foo\n ```\n<this is code>\n ```")).to include("<this is code>")
|
||||||
|
|
||||||
|
expect(to_markdown("foo\n ```\n<this is code>\n```")).to include("<this is code>")
|
||||||
|
expect(to_markdown("foo\n```\n<this is code>\n ```")).to include("<this is code>")
|
||||||
|
|
||||||
|
expect(to_markdown("foo\n ```\n<this is code>\n```")).to include("<this is code>")
|
||||||
|
expect(to_markdown("foo\n```\n<this is code>\n ```")).to include("<this is code>")
|
||||||
|
end
|
||||||
|
end
|
||||||
|
end
|
Loading…
Reference in New Issue