2019-05-30 02:38:46 -04:00
|
|
|
# frozen_string_literal: true
|
|
|
|
|
|
|
|
require_dependency "pretty_text"
|
|
|
|
|
|
|
|
class InlineUploads
|
2019-06-03 03:41:26 -04:00
|
|
|
PLACEHOLDER = "__replace__"
|
2019-06-06 03:50:16 -04:00
|
|
|
PATH_PLACEHOLDER = "__replace_path__"
|
2019-06-03 03:41:26 -04:00
|
|
|
|
|
|
|
UPLOAD_REGEXP_PATTERN = "/original/(\\dX/(?:[a-f0-9]/)*[a-f0-9]{40}[a-z0-9.]*)"
|
|
|
|
private_constant :UPLOAD_REGEXP_PATTERN
|
|
|
|
|
2019-05-30 02:38:46 -04:00
|
|
|
def self.process(markdown, on_missing: nil)
|
|
|
|
markdown = markdown.dup
|
2019-06-03 03:41:26 -04:00
|
|
|
cooked_fragment = Nokogiri::HTML::fragment(PrettyText.cook(markdown, disable_emojis: true))
|
2019-05-30 02:38:46 -04:00
|
|
|
link_occurences = []
|
|
|
|
|
|
|
|
cooked_fragment.traverse do |node|
|
|
|
|
if node.name == "img"
|
|
|
|
# Do nothing
|
2019-06-06 03:50:16 -04:00
|
|
|
elsif !(node.children.count == 1 && (node.children[0].name != "img" && node.children[0].children.blank?))
|
2019-05-30 02:38:46 -04:00
|
|
|
next
|
|
|
|
end
|
|
|
|
|
|
|
|
if seen_link = matched_uploads(node).first
|
2019-06-03 03:41:26 -04:00
|
|
|
link_occurences <<
|
|
|
|
if (actual_link = (node.attributes["href"]&.value || node.attributes["src"]&.value))
|
|
|
|
{ link: actual_link, is_valid: true }
|
|
|
|
else
|
|
|
|
{ link: seen_link, is_valid: false }
|
|
|
|
end
|
2019-05-30 02:38:46 -04:00
|
|
|
end
|
|
|
|
end
|
|
|
|
|
2019-06-03 03:41:26 -04:00
|
|
|
raw_matches = []
|
2019-05-30 02:38:46 -04:00
|
|
|
|
2019-06-06 03:50:16 -04:00
|
|
|
match_bbcode_img(markdown) do |match, src, replacement, index|
|
|
|
|
raw_matches << [match, src, replacement, index]
|
2019-06-03 03:41:26 -04:00
|
|
|
end
|
|
|
|
|
2019-06-06 03:50:16 -04:00
|
|
|
match_md_inline_img(markdown) do |match, src, replacement, index|
|
|
|
|
raw_matches << [match, src, replacement, index]
|
2019-06-03 03:41:26 -04:00
|
|
|
end
|
2019-05-30 02:38:46 -04:00
|
|
|
|
2019-06-06 03:50:16 -04:00
|
|
|
match_md_reference(markdown) do |match, src, replacement, index|
|
|
|
|
raw_matches << [match, src, replacement, index]
|
2019-06-03 03:41:26 -04:00
|
|
|
end
|
|
|
|
|
2019-06-06 03:50:16 -04:00
|
|
|
match_img(markdown) do |match, src, replacement, index|
|
|
|
|
raw_matches << [match, src, replacement, index]
|
|
|
|
end
|
2019-06-03 03:41:26 -04:00
|
|
|
|
2019-06-06 03:50:16 -04:00
|
|
|
match_anchor(markdown) do |match, href, replacement, index|
|
|
|
|
raw_matches << [match, href, replacement, index]
|
2019-06-03 03:41:26 -04:00
|
|
|
end
|
|
|
|
|
|
|
|
db = RailsMultisite::ConnectionManagement.current_db
|
|
|
|
|
|
|
|
regexps = [
|
2019-06-06 23:46:52 -04:00
|
|
|
/(https?:\/\/[a-zA-Z0-9\.\/-]+\/uploads\/#{db}#{UPLOAD_REGEXP_PATTERN})/,
|
2019-06-03 03:41:26 -04:00
|
|
|
]
|
|
|
|
|
|
|
|
if Discourse.store.external?
|
2019-06-06 23:46:52 -04:00
|
|
|
regexps << /(https?:#{SiteSetting.Upload.s3_base_url}#{UPLOAD_REGEXP_PATTERN})/
|
|
|
|
regexps << /(#{SiteSetting.Upload.s3_cdn_url}#{UPLOAD_REGEXP_PATTERN})/
|
2019-06-03 03:41:26 -04:00
|
|
|
end
|
|
|
|
|
|
|
|
regexps.each do |regexp|
|
2019-06-06 23:46:52 -04:00
|
|
|
indexes = Set.new
|
|
|
|
|
|
|
|
markdown.scan(/(\n{2,}|\A)#{regexp}$/) do |match|
|
|
|
|
if match[1].present?
|
|
|
|
index = $~.offset(2)[0]
|
|
|
|
indexes << index
|
|
|
|
raw_matches << [match[1], match[1], +"![](#{PLACEHOLDER})", index]
|
|
|
|
end
|
|
|
|
end
|
|
|
|
|
|
|
|
markdown.scan(/^#{regexp}(\s)/) do |match|
|
|
|
|
if match[0].present?
|
|
|
|
index = $~.offset(0)[0]
|
|
|
|
next if indexes.include?(index)
|
|
|
|
indexes << index
|
|
|
|
|
|
|
|
raw_matches << [
|
|
|
|
match[0],
|
|
|
|
match[0],
|
|
|
|
+"#{Discourse.base_url}#{PATH_PLACEHOLDER}",
|
|
|
|
$~.offset(0)[0]
|
|
|
|
]
|
|
|
|
end
|
|
|
|
end
|
|
|
|
|
|
|
|
markdown.scan(/\[[^\[\]]*\]: #{regexp}/) do |match|
|
|
|
|
if match[0].present?
|
|
|
|
index = $~.offset(1)[0]
|
|
|
|
next if indexes.include?(index)
|
|
|
|
indexes << index
|
|
|
|
end
|
|
|
|
end
|
|
|
|
|
|
|
|
markdown.scan(/((\n|\s)+)#{regexp}/) do |match|
|
|
|
|
if matched_uploads(match[2]).present?
|
|
|
|
next if indexes.include?($~.offset(3)[0])
|
|
|
|
|
|
|
|
raw_matches << [
|
|
|
|
match[2],
|
|
|
|
match[2],
|
|
|
|
+"#{Discourse.base_url}#{PATH_PLACEHOLDER}",
|
|
|
|
$~.offset(0)[0]
|
|
|
|
]
|
2019-06-03 03:41:26 -04:00
|
|
|
end
|
|
|
|
end
|
|
|
|
end
|
|
|
|
|
|
|
|
raw_matches
|
|
|
|
.sort { |a, b| a[3] <=> b[3] }
|
|
|
|
.each do |match, link, replace_with, _index|
|
|
|
|
|
|
|
|
node_info = link_occurences.shift
|
|
|
|
next unless node_info&.dig(:is_valid)
|
|
|
|
|
|
|
|
if link.include?(node_info[:link])
|
|
|
|
begin
|
|
|
|
uri = URI(link)
|
|
|
|
rescue URI::Error
|
|
|
|
end
|
|
|
|
|
|
|
|
if !Discourse.store.external?
|
|
|
|
next if uri&.host && uri.host != Discourse.current_hostname
|
|
|
|
end
|
|
|
|
|
|
|
|
upload = Upload.get_from_url(link)
|
|
|
|
|
|
|
|
if upload
|
2019-06-06 03:50:16 -04:00
|
|
|
replace_with.sub!(PLACEHOLDER, upload.short_url)
|
|
|
|
replace_with.sub!(PATH_PLACEHOLDER, upload.short_path)
|
|
|
|
markdown.sub!(match, replace_with)
|
2019-06-03 03:41:26 -04:00
|
|
|
else
|
|
|
|
on_missing.call(link) if on_missing
|
2019-05-30 02:38:46 -04:00
|
|
|
end
|
|
|
|
end
|
|
|
|
end
|
|
|
|
|
|
|
|
markdown
|
|
|
|
end
|
|
|
|
|
2019-06-06 03:50:16 -04:00
|
|
|
def self.match_md_inline_img(markdown, external_src: false)
|
|
|
|
markdown.scan(/(!?\[([^\[\]]*)\]\(([a-zA-z0-9\.\/:-]+)([ ]*['"]{1}[^\)]*['"]{1}[ ]*)?\))/) do |match|
|
|
|
|
if (matched_uploads(match[2]).present? || external_src) && block_given?
|
|
|
|
yield(
|
|
|
|
match[0],
|
|
|
|
match[2],
|
|
|
|
+"#{match[0].start_with?("!") ? "!" : ""}[#{match[1]}](#{PLACEHOLDER}#{match[3]})",
|
|
|
|
$~.offset(0)[0]
|
|
|
|
)
|
|
|
|
end
|
|
|
|
end
|
|
|
|
end
|
|
|
|
|
|
|
|
def self.match_bbcode_img(markdown)
|
|
|
|
markdown.scan(/(\[img\]\s?(.+)\s?\[\/img\])/) do |match|
|
|
|
|
yield(match[0], match[1], +"![](#{PLACEHOLDER})", $~.offset(0)[0]) if block_given?
|
|
|
|
end
|
|
|
|
end
|
|
|
|
|
|
|
|
def self.match_md_reference(markdown)
|
|
|
|
markdown.scan(/(\[([^\]]+)\]:([ ]+)(\S+))/) do |match|
|
2019-06-06 23:46:52 -04:00
|
|
|
if match[3] && matched_uploads(match[3]).present? && block_given?
|
2019-06-06 03:50:16 -04:00
|
|
|
yield(
|
|
|
|
match[0],
|
|
|
|
match[3],
|
|
|
|
+"[#{match[1]}]:#{match[2]}#{Discourse.base_url}#{PATH_PLACEHOLDER}",
|
|
|
|
$~.offset(0)[0]
|
|
|
|
)
|
|
|
|
end
|
|
|
|
end
|
|
|
|
end
|
|
|
|
|
|
|
|
def self.match_anchor(markdown, external_href: false)
|
|
|
|
markdown.scan(/((<a[^<]+>)([^<\a>]*?)<\/a>)/) do |match|
|
|
|
|
node = Nokogiri::HTML::fragment(match[0]).children[0]
|
|
|
|
href = node.attributes["href"]&.value
|
|
|
|
|
|
|
|
if href && (matched_uploads(href).present? || external_href)
|
|
|
|
has_attachment = node.attributes["class"]&.value
|
|
|
|
index = $~.offset(0)[0]
|
|
|
|
text = match[2].strip.gsub("\n", "").gsub(/ +/, " ")
|
|
|
|
text = "#{text}|attachment" if has_attachment
|
|
|
|
|
|
|
|
yield(match[0], href, +"[#{text}](#{PLACEHOLDER})", index) if block_given?
|
|
|
|
end
|
|
|
|
end
|
|
|
|
end
|
|
|
|
|
|
|
|
def self.match_img(markdown, external_src: false)
|
|
|
|
markdown.scan(/(<(?!img)[^<>]+\/?>)?(\n*)(([ ]*)<img ([^<>]+)>([ ]*))(\n*)/) do |match|
|
|
|
|
node = Nokogiri::HTML::fragment(match[2].strip).children[0]
|
|
|
|
src = node.attributes["src"].value
|
|
|
|
|
|
|
|
if matched_uploads(src).present? || external_src
|
|
|
|
text = node.attributes["alt"]&.value
|
|
|
|
width = node.attributes["width"]&.value
|
|
|
|
height = node.attributes["height"]&.value
|
|
|
|
title = node.attributes["title"]&.value
|
|
|
|
text = "#{text}|#{width}x#{height}" if width && height
|
|
|
|
after_html_tag = match[0].present?
|
|
|
|
|
|
|
|
spaces_before =
|
|
|
|
if after_html_tag && !match[0].end_with?("/>")
|
|
|
|
(match[3].present? ? match[3] : " ")
|
|
|
|
else
|
|
|
|
""
|
|
|
|
end
|
|
|
|
|
|
|
|
replacement = +"#{spaces_before}![#{text}](#{PLACEHOLDER}#{title.present? ? " \"#{title}\"" : ""})"
|
|
|
|
|
|
|
|
if after_html_tag && (num_newlines = match[1].length) <= 1
|
|
|
|
replacement.prepend("\n" * (num_newlines == 0 ? 2 : 1))
|
|
|
|
end
|
|
|
|
|
|
|
|
if after_html_tag && !match[0].end_with?("/>") && (num_newlines = match[6].length) <= 1
|
|
|
|
replacement += ("\n" * (num_newlines == 0 ? 2 : 1))
|
|
|
|
end
|
|
|
|
|
|
|
|
match[2].strip! if !after_html_tag
|
|
|
|
|
|
|
|
yield(match[2], src, replacement, $~.offset(0)[0]) if block_given?
|
|
|
|
end
|
|
|
|
end
|
|
|
|
end
|
|
|
|
|
2019-05-30 02:38:46 -04:00
|
|
|
def self.matched_uploads(node)
|
|
|
|
matches = []
|
|
|
|
|
|
|
|
regexps = [
|
|
|
|
/(upload:\/\/([a-zA-Z0-9]+)[a-z0-9\.]*)/,
|
|
|
|
/(\/uploads\/short-url\/([a-zA-Z0-9]+)[a-z0-9\.]*)/,
|
|
|
|
]
|
|
|
|
|
|
|
|
db = RailsMultisite::ConnectionManagement.current_db
|
|
|
|
|
|
|
|
if Discourse.store.external?
|
|
|
|
if Rails.configuration.multisite
|
2019-06-03 03:41:26 -04:00
|
|
|
regexps << /(#{SiteSetting.Upload.s3_base_url}\/uploads\/#{db}#{UPLOAD_REGEXP_PATTERN})/
|
|
|
|
regexps << /(#{SiteSetting.Upload.s3_cdn_url}\/uploads\/#{db}#{UPLOAD_REGEXP_PATTERN})/
|
2019-05-30 02:38:46 -04:00
|
|
|
else
|
2019-06-03 03:41:26 -04:00
|
|
|
regexps << /(#{SiteSetting.Upload.s3_base_url}#{UPLOAD_REGEXP_PATTERN})/
|
|
|
|
regexps << /(#{SiteSetting.Upload.s3_cdn_url}#{UPLOAD_REGEXP_PATTERN})/
|
|
|
|
regexps << /(\/uploads\/#{db}#{UPLOAD_REGEXP_PATTERN})/
|
2019-05-30 02:38:46 -04:00
|
|
|
end
|
|
|
|
else
|
2019-06-03 03:41:26 -04:00
|
|
|
regexps << /(\/uploads\/#{db}#{UPLOAD_REGEXP_PATTERN})/
|
2019-05-30 02:38:46 -04:00
|
|
|
end
|
|
|
|
|
|
|
|
node = node.to_s
|
|
|
|
|
|
|
|
regexps.each do |regexp|
|
2019-06-03 03:41:26 -04:00
|
|
|
node.scan(regexp) do |matched|
|
2019-05-30 02:38:46 -04:00
|
|
|
matches << matched[0]
|
|
|
|
end
|
|
|
|
end
|
|
|
|
|
|
|
|
matches
|
|
|
|
end
|
|
|
|
private_class_method :matched_uploads
|
|
|
|
end
|