DEV: Class that converts MD with old attachment links to new MD.
This commit is contained in:
parent
22abad4151
commit
d93e5fb00d
1
Gemfile
1
Gemfile
|
@ -151,6 +151,7 @@ group :development do
|
|||
gem 'bullet', require: !!ENV['BULLET']
|
||||
gem 'better_errors'
|
||||
gem 'binding_of_caller'
|
||||
gem 'diffy'
|
||||
|
||||
# waiting on 2.7.5 per: https://github.com/ctran/annotate_models/pull/595
|
||||
if rails_master?
|
||||
|
|
|
@ -90,6 +90,7 @@ GEM
|
|||
crass (1.0.4)
|
||||
debug_inspector (0.0.3)
|
||||
diff-lcs (1.3)
|
||||
diffy (3.3.0)
|
||||
discourse-ember-source (3.8.0.1)
|
||||
discourse_image_optim (0.26.2)
|
||||
exifr (~> 1.2, >= 1.2.2)
|
||||
|
@ -435,6 +436,7 @@ DEPENDENCIES
|
|||
certified
|
||||
colored2
|
||||
cppjieba_rb
|
||||
diffy
|
||||
discourse-ember-source (~> 3.8.0)
|
||||
discourse_image_optim
|
||||
email_reply_trimmer (~> 0.1)
|
||||
|
|
|
@ -0,0 +1,151 @@
|
|||
# frozen_string_literal: true
|
||||
|
||||
require_dependency "pretty_text"
|
||||
|
||||
class InlineUploads
|
||||
def self.process(markdown, on_missing: nil)
|
||||
markdown = markdown.dup
|
||||
cooked_fragment = Nokogiri::HTML::fragment(PrettyText.cook(markdown))
|
||||
link_occurences = []
|
||||
|
||||
cooked_fragment.traverse do |node|
|
||||
if node.name == "img"
|
||||
# Do nothing
|
||||
elsif !(node.children.count == 1 && (node.children[0].name != "img" && node.children[0].children.blank?))
|
||||
next
|
||||
end
|
||||
|
||||
if seen_link = matched_uploads(node).first
|
||||
if actual_link = (node.attributes["href"]&.value || node.attributes["src"]&.value)
|
||||
link_occurences << [actual_link, true]
|
||||
else
|
||||
link_occurences << [seen_link, false]
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
raw_fragment = Nokogiri::HTML::fragment(markdown)
|
||||
|
||||
raw_fragment.traverse do |node|
|
||||
if node.name == "img"
|
||||
# Do nothing
|
||||
elsif !(node.children.count == 0 || (node.children.count == 1 && node.children[0].children.blank?))
|
||||
next
|
||||
end
|
||||
|
||||
matches = matched_uploads(node)
|
||||
next if matches.blank?
|
||||
links = extract_links(node)
|
||||
|
||||
matches.zip(links).each do |_match, link|
|
||||
seen_link, is_valid = link_occurences.shift
|
||||
next unless (link && is_valid)
|
||||
|
||||
if link.include?(seen_link)
|
||||
begin
|
||||
uri = URI(link)
|
||||
rescue URI::Error
|
||||
end
|
||||
|
||||
if !Discourse.store.external?
|
||||
next if uri&.host && uri.host != Discourse.current_hostname
|
||||
end
|
||||
|
||||
upload = Upload.get_from_url(link)
|
||||
|
||||
if upload
|
||||
new_node =
|
||||
case node.name
|
||||
when 'a'
|
||||
attachment_postfix =
|
||||
if node.attributes["class"]&.value&.split(" ")&.include?("attachment")
|
||||
"|attachment"
|
||||
else
|
||||
""
|
||||
end
|
||||
|
||||
text = node.children.text.strip.gsub("\n", "").gsub(/ +/, " ")
|
||||
|
||||
markdown.sub!(
|
||||
node.to_s,
|
||||
"[#{text}#{attachment_postfix}](#{upload.short_url})"
|
||||
)
|
||||
when "img"
|
||||
text = node.attributes["alt"]&.value
|
||||
width = node.attributes["width"]&.value
|
||||
height = node.attributes["height"]&.value
|
||||
text = "#{text}|#{width}x#{height}" if width && height
|
||||
markdown.sub!(node.to_s, "![#{text}](#{upload.short_url})")
|
||||
else
|
||||
if markdown =~ /\[img\]\s?#{link}\s?\[\/img\]/
|
||||
capture = Regexp.last_match[0]
|
||||
|
||||
if capture
|
||||
markdown.sub!(capture, "![](#{upload.short_url})")
|
||||
end
|
||||
elsif markdown =~ /(!?\[([a-z0-9|]+)\]\([a-zA-z0-9\.\/]+\))/
|
||||
capture = Regexp.last_match[0]
|
||||
|
||||
if capture
|
||||
markdown.sub!(capture, "![#{Regexp.last_match[2]}](#{upload.short_url})")
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
else
|
||||
on_missing.call(link) if on_missing
|
||||
end
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
markdown
|
||||
end
|
||||
|
||||
def self.matched_uploads(node)
|
||||
matches = []
|
||||
|
||||
regexps = [
|
||||
/(upload:\/\/([a-zA-Z0-9]+)[a-z0-9\.]*)/,
|
||||
/(\/uploads\/short-url\/([a-zA-Z0-9]+)[a-z0-9\.]*)/,
|
||||
]
|
||||
|
||||
db = RailsMultisite::ConnectionManagement.current_db
|
||||
|
||||
if Discourse.store.external?
|
||||
if Rails.configuration.multisite
|
||||
regexps << /(#{SiteSetting.Upload.s3_base_url}\/uploads\/#{db}\/original\/(\dX\/(?:[a-f0-9]\/)*[a-f0-9]{40}[a-z0-9\.]*))/
|
||||
regexps << /(#{SiteSetting.Upload.s3_cdn_url}\/uploads\/#{db}\/original\/(\dX\/(?:[a-f0-9]\/)*[a-f0-9]{40}[a-z0-9\.]*))/
|
||||
else
|
||||
regexps << /(#{SiteSetting.Upload.s3_base_url}\/original\/(\dX\/(?:[a-f0-9]\/)*[a-f0-9]{40}[a-z0-9\.]*))/
|
||||
regexps << /(#{SiteSetting.Upload.s3_cdn_url}\/original\/(\dX\/(?:[a-f0-9]\/)*[a-f0-9]{40}[a-z0-9\.]*))/
|
||||
regexps << /(\/uploads\/#{db}\/original\/(\dX\/(?:[a-f0-9]\/)*[a-f0-9]{40}[a-z0-9\.]*))/
|
||||
end
|
||||
else
|
||||
regexps << /(\/uploads\/#{db}\/original\/(\dX\/(?:[a-f0-9]\/)*[a-f0-9]{40}[a-z0-9\.]*))/
|
||||
end
|
||||
|
||||
node = node.to_s
|
||||
|
||||
regexps.each do |regexp|
|
||||
node.scan(regexp).each do |matched|
|
||||
matches << matched[0]
|
||||
end
|
||||
end
|
||||
|
||||
matches
|
||||
end
|
||||
private_class_method :matched_uploads
|
||||
|
||||
def self.extract_links(node)
|
||||
links = []
|
||||
links << node.attributes["href"]&.value
|
||||
links << node.attributes["src"]&.value
|
||||
links = links.concat(node.to_s.scan(/\[img\]\s?(.+)\s?\[\/img\]/))
|
||||
links = links.concat(node.to_s.scan(/!?\[[a-z0-9|]+\]\(([a-zA-z0-9\.\/]+)\)/))
|
||||
links.flatten!
|
||||
links.compact!
|
||||
links
|
||||
end
|
||||
private_class_method :extract_links
|
||||
end
|
|
@ -649,3 +649,44 @@ task 'posts:invalidate_broken_images' => :environment do
|
|||
puts
|
||||
puts "", "#{rebaked} posts rebaked!"
|
||||
end
|
||||
|
||||
desc "Coverts full upload URLs in `Post#raw` to short upload url"
|
||||
task 'posts:inline_uploads' => :environment do |_, args|
|
||||
dry_run = ENV["DRY_RUN"] || true
|
||||
|
||||
scope = Post.joins(:post_uploads)
|
||||
.distinct("posts.id")
|
||||
.where("raw LIKE '%class=\"attachment%' OR raw LIKE '%<img src=\"%'")
|
||||
|
||||
affected_posts_count = scope.count
|
||||
fixed_count = 0
|
||||
not_corrected_post_ids = []
|
||||
|
||||
scope.find_each do |post|
|
||||
new_raw = InlineUploads.process(post.raw)
|
||||
|
||||
if post.raw != new_raw
|
||||
if dry_run
|
||||
puts "Post id #{post.id} raw changed!"
|
||||
Diffy::Diff.default_format = :color
|
||||
puts Diffy::Diff.new(post.raw, new_raw, context: 1)
|
||||
else
|
||||
putc "."
|
||||
end
|
||||
|
||||
fixed_count += 1
|
||||
else
|
||||
not_corrected_post_ids << post.id
|
||||
end
|
||||
end
|
||||
|
||||
puts "#{fixed_count} out of #{affected_posts_count} affected posts corrected"
|
||||
|
||||
if fixed_count != affected_posts_count
|
||||
puts "Ids of posts that were not correct: #{not_corrected_post_ids}"
|
||||
end
|
||||
|
||||
if dry_run
|
||||
|
||||
end
|
||||
end
|
||||
|
|
|
@ -24,12 +24,15 @@ end
|
|||
Fabricator(:upload_s3, from: :upload) do
|
||||
url do |attrs|
|
||||
sequence(:url) do |n|
|
||||
File.join(
|
||||
Discourse.store.absolute_base_url,
|
||||
Discourse.store.get_path_for(
|
||||
path = +Discourse.store.get_path_for(
|
||||
"original", n + 1, attrs[:sha1], ".#{attrs[:extension]}"
|
||||
)
|
||||
)
|
||||
|
||||
if Rails.configuration.multisite
|
||||
path.prepend(File.join(Discourse.store.upload_path, "/"))
|
||||
end
|
||||
|
||||
File.join(Discourse.store.absolute_base_url, path)
|
||||
end
|
||||
end
|
||||
end
|
||||
|
|
|
@ -0,0 +1,243 @@
|
|||
require 'rails_helper'
|
||||
|
||||
RSpec.describe InlineUploads do
|
||||
before do
|
||||
@original_asset_host = Rails.configuration.action_controller.asset_host
|
||||
Rails.configuration.action_controller.asset_host = "https://cdn.discourse.org/stuff"
|
||||
end
|
||||
|
||||
after do
|
||||
Rails.configuration.action_controller.asset_host = @original_asset_host
|
||||
end
|
||||
|
||||
describe '.process' do
|
||||
describe 'local uploads' do
|
||||
fab!(:upload) { Fabricate(:upload) }
|
||||
fab!(:upload2) { Fabricate(:upload) }
|
||||
fab!(:upload3) { Fabricate(:upload) }
|
||||
|
||||
it "should not correct existing inline uploads" do
|
||||
md = <<~MD
|
||||
![test](#{upload.short_url})haha
|
||||
[test]#{upload.short_url}
|
||||
MD
|
||||
|
||||
expect(InlineUploads.process(md)).to eq(md)
|
||||
|
||||
md = <<~MD
|
||||
![test](#{upload.short_url})
|
||||
[test|attachment](#{upload.short_url})
|
||||
MD
|
||||
|
||||
expect(InlineUploads.process(md)).to eq(md)
|
||||
end
|
||||
|
||||
it "should not escape existing content" do
|
||||
md = "1 > 2"
|
||||
|
||||
expect(InlineUploads.process(md)).to eq(md)
|
||||
end
|
||||
|
||||
it "should not escape invalid HTML tags" do
|
||||
md = "<x>.<y>"
|
||||
|
||||
expect(InlineUploads.process(md)).to eq(md)
|
||||
end
|
||||
|
||||
it "should not correct code blocks" do
|
||||
md = "`<a class=\"attachment\" href=\"#{upload2.url}\">In Code Block</a>`"
|
||||
|
||||
expect(InlineUploads.process(md)).to eq(md)
|
||||
|
||||
md = " <a class=\"attachment\" href=\"#{upload2.url}\">In Code Block</a>"
|
||||
|
||||
expect(InlineUploads.process(md)).to eq(md)
|
||||
end
|
||||
|
||||
it "should correct bbcode img URLs to the short version" do
|
||||
md = <<~MD
|
||||
[img]#{upload.url}[/img]
|
||||
|
||||
[img]
|
||||
#{upload2.url}
|
||||
[/img]
|
||||
MD
|
||||
|
||||
expect(InlineUploads.process(md)).to eq(<<~MD)
|
||||
![](#{upload.short_url})
|
||||
|
||||
![](#{upload2.short_url})
|
||||
MD
|
||||
end
|
||||
|
||||
it "should correct image URLs to the short version" do
|
||||
md = <<~MD
|
||||
![image|690x290](#{upload.short_url})
|
||||
|
||||
![image](#{upload.url})
|
||||
![image|100x100](#{upload.url})
|
||||
|
||||
<img src="#{Discourse.base_url}#{upload.url}" alt="some image">
|
||||
<img src="#{Discourse.base_url}#{upload.url}" alt="some image"><img src="#{Discourse.base_url}#{upload.url}" alt="some image">
|
||||
|
||||
<img src="#{upload.url}" width="5" height="4">
|
||||
MD
|
||||
|
||||
expect(InlineUploads.process(md)).to eq(<<~MD)
|
||||
![image|690x290](#{upload.short_url})
|
||||
|
||||
![image](#{upload.short_url})
|
||||
![image|100x100](#{upload.short_url})
|
||||
|
||||
![some image](#{upload.short_url})
|
||||
![some image](#{upload.short_url})![some image](#{upload.short_url})
|
||||
|
||||
![|5x4](#{upload.short_url})
|
||||
MD
|
||||
end
|
||||
|
||||
it "should correct attachment URLS with an upload before" do
|
||||
md = <<~MD
|
||||
![image](#{upload.short_url})
|
||||
|
||||
<a class="attachment" href="#{upload2.url}">test2</a>
|
||||
MD
|
||||
|
||||
expect(InlineUploads.process(md)).to eq(<<~MD)
|
||||
![image](#{upload.short_url})
|
||||
|
||||
[test2|attachment](#{upload2.short_url})
|
||||
MD
|
||||
end
|
||||
|
||||
it "should correct attachment URLs to the short version" do
|
||||
md = <<~MD
|
||||
<a class="attachment" href="#{upload.url}">
|
||||
this
|
||||
is
|
||||
some
|
||||
attachment
|
||||
|
||||
</a>
|
||||
|
||||
- <a class="attachment" href="#{upload2.url}">test2</a>
|
||||
- <a class="attachment" href="#{upload2.url}">test2</a>
|
||||
- <a class="attachment" href="#{upload2.url}">test2</a>
|
||||
|
||||
<a class="test attachment" href="#{upload3.url}">test3</a>
|
||||
<a class="test attachment" href="#{upload3.url}">test3</a><a class="test attachment" href="#{upload3.url}">test3</a>
|
||||
MD
|
||||
|
||||
expect(InlineUploads.process(md)).to eq(<<~MD)
|
||||
[this is some attachment|attachment](#{upload.short_url})
|
||||
|
||||
- [test2|attachment](#{upload2.short_url})
|
||||
- [test2|attachment](#{upload2.short_url})
|
||||
- [test2|attachment](#{upload2.short_url})
|
||||
|
||||
[test3|attachment](#{upload3.short_url})
|
||||
[test3|attachment](#{upload3.short_url})[test3|attachment](#{upload3.short_url})
|
||||
MD
|
||||
end
|
||||
|
||||
it 'should correct full upload url to the shorter version' do
|
||||
md = <<~MD
|
||||
Some random text
|
||||
|
||||
![test](#{upload.short_url})
|
||||
[test|attachment](#{upload.short_url})
|
||||
|
||||
<a class="test attachment" href="#{upload.url}">
|
||||
test
|
||||
</a>
|
||||
|
||||
`<a class="attachment" href="#{upload2.url}">In Code Block</a>`
|
||||
|
||||
<a class="attachment" href="#{upload3.url}">In Code Block</a>
|
||||
|
||||
<a href="#{upload.url}">newtest</a>
|
||||
<a href="#{Discourse.base_url_no_prefix}#{upload.url}">newtest</a>
|
||||
|
||||
<a href="https://somerandomesite.com#{upload.url}">test</a>
|
||||
<a class="attachment" href="https://somerandom.com/url">test</a>
|
||||
MD
|
||||
|
||||
expect(InlineUploads.process(md)).to eq(<<~MD)
|
||||
Some random text
|
||||
|
||||
![test](#{upload.short_url})
|
||||
[test|attachment](#{upload.short_url})
|
||||
|
||||
[test|attachment](#{upload.short_url})
|
||||
|
||||
`<a class="attachment" href="#{upload2.url}">In Code Block</a>`
|
||||
|
||||
<a class="attachment" href="#{upload3.url}">In Code Block</a>
|
||||
|
||||
[newtest](#{upload.short_url})
|
||||
[newtest](#{upload.short_url})
|
||||
|
||||
<a href="https://somerandomesite.com#{upload.url}">test</a>
|
||||
<a class="attachment" href="https://somerandom.com/url">test</a>
|
||||
MD
|
||||
end
|
||||
|
||||
it 'accepts a block that yields when link does not match an upload in the db' do
|
||||
url = "#{Discourse.base_url}#{upload.url}"
|
||||
|
||||
md = <<~MD
|
||||
<img src="#{url}" alt="some image">
|
||||
<img src="#{upload2.url}" alt="some image">
|
||||
MD
|
||||
|
||||
upload.destroy!
|
||||
|
||||
InlineUploads.process(md, on_missing: lambda { |link|
|
||||
expect(link).to eq(url)
|
||||
})
|
||||
end
|
||||
end
|
||||
|
||||
describe "s3 uploads" do
|
||||
let(:upload) { Fabricate(:upload_s3) }
|
||||
|
||||
before do
|
||||
SiteSetting.enable_s3_uploads = true
|
||||
SiteSetting.s3_upload_bucket = "s3-upload-bucket"
|
||||
SiteSetting.s3_access_key_id = "some key"
|
||||
SiteSetting.s3_secret_access_key = "some secret key"
|
||||
SiteSetting.s3_cdn_url = "https://s3.cdn.com"
|
||||
end
|
||||
|
||||
it "should correct image URLs to the short version" do
|
||||
md = <<~MD
|
||||
<img src="#{upload.url}" alt="some image">
|
||||
<img src="#{URI.join(SiteSetting.s3_cdn_url, URI.parse(upload.url).path).to_s}" alt="some image">
|
||||
MD
|
||||
|
||||
expect(InlineUploads.process(md)).to eq(<<~MD)
|
||||
![some image](#{upload.short_url})
|
||||
![some image](#{upload.short_url})
|
||||
MD
|
||||
end
|
||||
|
||||
it "should correct image URLs in multisite" do
|
||||
begin
|
||||
Rails.configuration.multisite = true
|
||||
|
||||
md = <<~MD
|
||||
<img src="#{upload.url}" alt="some image">
|
||||
<img src="#{URI.join(SiteSetting.s3_cdn_url, URI.parse(upload.url).path).to_s}" alt="some image">
|
||||
MD
|
||||
|
||||
expect(InlineUploads.process(md)).to eq(<<~MD)
|
||||
![some image](#{upload.short_url})
|
||||
![some image](#{upload.short_url})
|
||||
MD
|
||||
ensure
|
||||
Rails.configuration.multisite = false
|
||||
end
|
||||
end
|
||||
end
|
||||
end
|
||||
end
|
Loading…
Reference in New Issue