From 157f10db4c98753e4999d184b6171738d9109210 Mon Sep 17 00:00:00 2001 From: Gerhard Schlager Date: Thu, 27 May 2021 17:42:25 +0200 Subject: [PATCH] FEATURE: Use path from existing URL of uploads and optimized images (#13177) Discourse shouldn't dynamically calculate the path of uploads and optimized images after a file has been stored on disk or S3. Otherwise it might calculate the wrong path if the SHA1 or extension stored in the database doesn't match the actual file path. --- lib/file_store/base_store.rb | 19 +++ lib/file_store/local_store.rb | 5 +- lib/file_store/s3_store.rb | 2 + lib/shrink_uploaded_image.rb | 2 +- spec/components/email/sender_spec.rb | 4 +- spec/components/file_store/base_store_spec.rb | 136 +++++++++++++++--- .../components/file_store/local_store_spec.rb | 20 +++ spec/components/file_store/s3_store_spec.rb | 14 +- spec/fabricators/upload_fabricator.rb | 2 +- spec/models/upload_spec.rb | 32 +++-- spec/multisite/s3_store_spec.rb | 9 +- 11 files changed, 189 insertions(+), 56 deletions(-) diff --git a/lib/file_store/base_store.rb b/lib/file_store/base_store.rb index 44749f586ab..80f1fb6680c 100644 --- a/lib/file_store/base_store.rb +++ b/lib/file_store/base_store.rb @@ -3,13 +3,17 @@ module FileStore class BaseStore + UPLOAD_PATH_REGEX = %r|/(original/\d+X/.*)| + OPTIMIZED_IMAGE_PATH_REGEX = %r|/(optimized/\d+X/.*)| def store_upload(file, upload, content_type = nil) + upload.url = nil path = get_path_for_upload(upload) store_file(file, path) end def store_optimized_image(file, optimized_image, content_type = nil, secure: false) + optimized_image.url = nil path = get_path_for_optimized_image(optimized_image) store_file(file, path) end @@ -116,6 +120,12 @@ module FileStore end def get_path_for_upload(upload) + # try to extract the path from the URL instead of calculating it, + # because the calculated path might differ from the actual path + if upload.url.present? && (path = upload.url[UPLOAD_PATH_REGEX, 1]) + return prefix_path(path) + end + extension = if upload.extension ".#{upload.extension}" @@ -128,6 +138,12 @@ module FileStore end def get_path_for_optimized_image(optimized_image) + # try to extract the path from the URL instead of calculating it, + # because the calculated path might differ from the actual path + if optimized_image.url.present? && (path = optimized_image.url[OPTIMIZED_IMAGE_PATH_REGEX, 1]) + return prefix_path(path) + end + upload = optimized_image.upload version = optimized_image.version || 1 extension = "_#{version}_#{optimized_image.width}x#{optimized_image.height}#{optimized_image.extension}" @@ -178,6 +194,9 @@ module FileStore depths.max end + def prefix_path(path) + path + end end end diff --git a/lib/file_store/local_store.rb b/lib/file_store/local_store.rb index 25649532c04..e945ac1c32b 100644 --- a/lib/file_store/local_store.rb +++ b/lib/file_store/local_store.rb @@ -66,7 +66,7 @@ module FileStore end def get_path_for(type, upload_id, sha, extension) - File.join("/", upload_path, super(type, upload_id, sha, extension)) + prefix_path(super(type, upload_id, sha, extension)) end def copy_file(file, path) @@ -134,5 +134,8 @@ module FileStore puts "#{count} of #{model.count} #{model.name.underscore.pluralize} are missing" if count > 0 end + def prefix_path(path) + File.join("/", upload_path, path) + end end end diff --git a/lib/file_store/s3_store.rb b/lib/file_store/s3_store.rb index ab6e56bb69f..c0671ca9f40 100644 --- a/lib/file_store/s3_store.rb +++ b/lib/file_store/s3_store.rb @@ -22,6 +22,7 @@ module FileStore end def store_upload(file, upload, content_type = nil) + upload.url = nil path = get_path_for_upload(upload) url, upload.etag = store_file( file, @@ -35,6 +36,7 @@ module FileStore end def store_optimized_image(file, optimized_image, content_type = nil, secure: false) + optimized_image.url = nil path = get_path_for_optimized_image(optimized_image) url, optimized_image.etag = store_file(file, path, content_type: content_type, private_acl: secure) url diff --git a/lib/shrink_uploaded_image.rb b/lib/shrink_uploaded_image.rb index 43416ade0d6..fbcdfd628bc 100644 --- a/lib/shrink_uploaded_image.rb +++ b/lib/shrink_uploaded_image.rb @@ -67,7 +67,7 @@ class ShrinkUploadedImage log "base62: #{original_upload.base62_sha1} -> #{Upload.base62_sha1(sha1)}" log "sha: #{original_upload.sha1} -> #{sha1}" - log "(an exisiting upload)" if existing_upload + log "(an existing upload)" if existing_upload success = true posts = Post.unscoped.joins(:post_uploads).where(post_uploads: { upload_id: original_upload.id }).uniq.sort_by(&:created_at) diff --git a/spec/components/email/sender_spec.rb b/spec/components/email/sender_spec.rb index 3f80e23b2a2..bacf426dbd9 100644 --- a/spec/components/email/sender_spec.rb +++ b/spec/components/email/sender_spec.rb @@ -489,8 +489,8 @@ describe Email::Sender do let!(:optimized_image_file) { file_from_fixtures("smallest.png", "images") } before do - Discourse.store.store_optimized_image(optimized_image_file, optimized) - optimized.update(url: Discourse.store.absolute_base_url + '/' + optimized.url) + url = Discourse.store.store_optimized_image(optimized_image_file, optimized) + optimized.update(url: Discourse.store.absolute_base_url + '/' + url) Discourse.store.cache_file(optimized_image_file, File.basename("#{optimized.sha1}.png")) end diff --git a/spec/components/file_store/base_store_spec.rb b/spec/components/file_store/base_store_spec.rb index 4771b28f7bb..05befda29a4 100644 --- a/spec/components/file_store/base_store_spec.rb +++ b/spec/components/file_store/base_store_spec.rb @@ -6,42 +6,136 @@ RSpec.describe FileStore::BaseStore do fab!(:upload) { Fabricate(:upload, id: 9999, sha1: Digest::SHA1.hexdigest('9999')) } describe '#get_path_for_upload' do - it 'should return the right path' do - expect(FileStore::BaseStore.new.get_path_for_upload(upload)) - .to eq('original/2X/4/4170ac2a2782a1516fe9e13d7322ae482c1bd594.png') + def expect_correct_path(expected_path) + expect(described_class.new.get_path_for_upload(upload)).to eq(expected_path) end - describe 'when Upload#extension has not been set' do - it 'should return the right path' do - upload.update!(extension: nil) + context "empty URL" do + before do + upload.update!(url: "") + end - expect(FileStore::BaseStore.new.get_path_for_upload(upload)) - .to eq('original/2X/4/4170ac2a2782a1516fe9e13d7322ae482c1bd594.png') + it 'should return the right path' do + expect_correct_path('original/2X/4/4170ac2a2782a1516fe9e13d7322ae482c1bd594.png') + end + + describe 'when Upload#extension has not been set' do + it 'should return the right path' do + upload.update!(extension: nil) + expect_correct_path('original/2X/4/4170ac2a2782a1516fe9e13d7322ae482c1bd594.png') + end + end + + describe 'when id is negative' do + it 'should return the right depth' do + upload.update!(id: -999) + expect_correct_path('original/1X/4170ac2a2782a1516fe9e13d7322ae482c1bd594.png') + end end end - describe 'when id is negative' do - it 'should return the right depth' do - upload.update!(id: -999) + context "existing URL" do + context "regular site" do + it "returns the correct path for files stored on local storage" do + upload.update!(url: "/uploads/default/original/1X/63b76551662ccea1a594e161c37dd35188d77657.jpeg") + expect_correct_path("original/1X/63b76551662ccea1a594e161c37dd35188d77657.jpeg") - expect(FileStore::BaseStore.new.get_path_for_upload(upload)) - .to eq('original/1X/4170ac2a2782a1516fe9e13d7322ae482c1bd594.png') + upload.update!(url: "/uploads/default/original/3X/63/63b76551662ccea1a594e161c37dd35188d77657.jpeg") + expect_correct_path("original/3X/63/63b76551662ccea1a594e161c37dd35188d77657.jpeg") + end + + it "returns the correct path for files stored on S3" do + upload.update!(url: "//bucket-name.s3.dualstack.us-west-2.amazonaws.com/original/1X/63b76551662ccea1a594e161c37dd35188d77657.jpeg") + expect_correct_path("original/1X/63b76551662ccea1a594e161c37dd35188d77657.jpeg") + + upload.update!(url: "//bucket-name.s3.dualstack.us-west-2.amazonaws.com/original/3X/63/63b76551662ccea1a594e161c37dd35188d77657.jpeg") + expect_correct_path("original/3X/63/63b76551662ccea1a594e161c37dd35188d77657.jpeg") + end + end + + context "multisite" do + it "returns the correct path for files stored on local storage" do + upload.update!(url: "/uploads/foo/original/1X/63b76551662ccea1a594e161c37dd35188d77657.jpeg") + expect_correct_path("original/1X/63b76551662ccea1a594e161c37dd35188d77657.jpeg") + + upload.update!(url: "/uploads/foo/original/3X/63/63b76551662ccea1a594e161c37dd35188d77657.jpeg") + expect_correct_path("original/3X/63/63b76551662ccea1a594e161c37dd35188d77657.jpeg") + end + + it "returns the correct path for files stored on S3" do + upload.update!(url: "//bucket-name.s3.dualstack.us-west-2.amazonaws.com/uploads/foo/original/1X/63b76551662ccea1a594e161c37dd35188d77657.jpeg") + expect_correct_path("original/1X/63b76551662ccea1a594e161c37dd35188d77657.jpeg") + + upload.update!(url: "//bucket-name.s3.dualstack.us-west-2.amazonaws.com/uploads/foo/original/3X/63/63b76551662ccea1a594e161c37dd35188d77657.jpeg") + expect_correct_path("original/3X/63/63b76551662ccea1a594e161c37dd35188d77657.jpeg") + end + + it "returns the correct path when the site name is 'original'" do + upload.update!(url: "/uploads/original/original/1X/63b76551662ccea1a594e161c37dd35188d77657.jpeg") + expect_correct_path("original/1X/63b76551662ccea1a594e161c37dd35188d77657.jpeg") + + upload.update!(url: "//bucket-name.s3.dualstack.us-west-2.amazonaws.com/uploads/original/original/1X/63b76551662ccea1a594e161c37dd35188d77657.jpeg") + expect_correct_path("original/1X/63b76551662ccea1a594e161c37dd35188d77657.jpeg") + end end end end describe '#get_path_for_optimized_image' do - let(:upload) { Fabricate.build(:upload, id: 100) } - let(:optimized_path) { "optimized/1X/#{upload.sha1}_1_100x200.png" } + let!(:upload) { Fabricate.build(:upload, id: 100) } + let!(:optimized_path) { "optimized/1X/#{upload.sha1}_1_100x200.png" } - it 'should return the right path' do - optimized = Fabricate.build(:optimized_image, upload: upload, version: 1) - expect(FileStore::BaseStore.new.get_path_for_optimized_image(optimized)).to eq(optimized_path) + context "empty URL" do + it 'should return the right path' do + optimized = Fabricate.build(:optimized_image, upload: upload, version: 1) + expect(FileStore::BaseStore.new.get_path_for_optimized_image(optimized)).to eq(optimized_path) + end + + it 'should return the right path for `nil` version' do + optimized = Fabricate.build(:optimized_image, upload: upload, version: nil) + expect(FileStore::BaseStore.new.get_path_for_optimized_image(optimized)).to eq(optimized_path) + end end - it 'should return the right path for `nil` version' do - optimized = Fabricate.build(:optimized_image, upload: upload, version: nil) - expect(FileStore::BaseStore.new.get_path_for_optimized_image(optimized)).to eq(optimized_path) + context "existing URL" do + let!(:optimized) { Fabricate.build(:optimized_image, upload: upload, version: 1) } + let!(:optimized_path) { "optimized/1X/#{upload.sha1}_1_100x200.jpg" } + + def expect_correct_optimized_path + expect(described_class.new.get_path_for_optimized_image(optimized)).to eq(optimized_path) + end + + context "regular site" do + it "returns the correct path for files stored on local storage" do + optimized.update!(url: "/uploads/default/optimized/1X/#{upload.sha1}_1_100x200.jpg") + expect_correct_optimized_path + end + + it "returns the correct path for files stored on S3" do + optimized.update!(url: "//bucket-name.s3.dualstack.us-west-2.amazonaws.com/optimized/1X/#{upload.sha1}_1_100x200.jpg") + expect_correct_optimized_path + end + end + + context "multisite" do + it "returns the correct path for files stored on local storage" do + optimized.update!(url: "/uploads/foo/optimized/1X/#{upload.sha1}_1_100x200.jpg") + expect_correct_optimized_path + end + + it "returns the correct path for files stored on S3" do + optimized.update!(url: "//bucket-name.s3.dualstack.us-west-2.amazonaws.com/uploads/foo/optimized/1X/#{upload.sha1}_1_100x200.jpg") + expect_correct_optimized_path + end + + it "returns the correct path when the site name is 'optimized'" do + optimized.update!(url: "/uploads/optimized/optimized/1X/#{upload.sha1}_1_100x200.jpg") + expect_correct_optimized_path + + optimized.update!(url: "//bucket-name.s3.dualstack.us-west-2.amazonaws.com/uploads/optimized/optimized/1X/#{upload.sha1}_1_100x200.jpg") + expect_correct_optimized_path + end + end end end diff --git a/spec/components/file_store/local_store_spec.rb b/spec/components/file_store/local_store_spec.rb index f601dec73dc..60ac2a59579 100644 --- a/spec/components/file_store/local_store_spec.rb +++ b/spec/components/file_store/local_store_spec.rb @@ -146,4 +146,24 @@ describe FileStore::LocalStore do expect(store.external?).to eq(false) end + describe "#get_path_for" do + it "returns the correct path" do + expect(store.get_path_for("original", upload.id, upload.sha1, ".#{upload.extension}")) + .to match(%r|/#{upload_path}/original/.+#{upload.sha1}\.png|) + end + end + + describe "#get_path_for_upload" do + it "returns the correct path" do + expect(store.get_path_for_upload(upload)) + .to match(%r|/#{upload_path}/original/.+#{upload.sha1}\.png|) + end + end + + describe "#get_path_for_optimized_image" do + it "returns the correct path" do + expect(store.get_path_for_optimized_image(optimized_image)) + .to match(%r|/#{upload_path}/optimized/.+#{optimized_image.upload.sha1}_#{OptimizedImage::VERSION}_100x200\.png|) + end + end end diff --git a/spec/components/file_store/s3_store_spec.rb b/spec/components/file_store/s3_store_spec.rb index 3b617cf523f..5ac16351ef7 100644 --- a/spec/components/file_store/s3_store_spec.rb +++ b/spec/components/file_store/s3_store_spec.rb @@ -29,7 +29,6 @@ describe FileStore::S3Store do describe "#store_upload" do it "returns an absolute schemaless url" do - store.expects(:get_depth_for).with(upload.id).returns(0) s3_helper.expects(:s3_bucket).returns(s3_bucket).at_least_once s3_bucket.expects(:object).with("original/1X/#{upload.sha1}.png").returns(s3_object) s3_object.expects(:put).with( @@ -51,7 +50,6 @@ describe FileStore::S3Store do end it "returns an absolute schemaless url" do - store.expects(:get_depth_for).with(upload.id).returns(0) s3_helper.expects(:s3_bucket).returns(s3_bucket) s3_bucket.expects(:object).with("discourse-uploads/original/1X/#{upload.sha1}.png").returns(s3_object) @@ -67,7 +65,7 @@ describe FileStore::S3Store do it "saves secure attachment using private ACL" do SiteSetting.prevent_anons_from_downloading_files = true SiteSetting.authorized_extensions = "pdf|png|jpg|gif" - upload.update!(original_filename: "small.pdf", extension: "pdf", secure: true) + upload = Fabricate(:upload, original_filename: "small.pdf", extension: "pdf", secure: true) s3_helper.expects(:s3_bucket).returns(s3_bucket) s3_bucket.expects(:object).with("original/1X/#{upload.sha1}.pdf").returns(s3_object) @@ -109,7 +107,6 @@ describe FileStore::S3Store do end it "returns an absolute schemaless url" do - store.expects(:get_depth_for).with(optimized_image.upload.id).returns(0) s3_helper.expects(:s3_bucket).returns(s3_bucket) path = "optimized/1X/#{optimized_image.upload.sha1}_#{OptimizedImage::VERSION}_100x200.png" @@ -127,7 +124,6 @@ describe FileStore::S3Store do end it "returns an absolute schemaless url" do - store.expects(:get_depth_for).with(optimized_image.upload.id).returns(0) s3_helper.expects(:s3_bucket).returns(s3_bucket) path = "discourse-uploads/optimized/1X/#{optimized_image.upload.sha1}_#{OptimizedImage::VERSION}_100x200.png" @@ -170,7 +166,6 @@ describe FileStore::S3Store do context 'removal from s3' do describe "#remove_upload" do it "removes the file from s3 with the right paths" do - store.expects(:get_depth_for).with(upload.id).returns(0) s3_helper.expects(:s3_bucket).returns(s3_bucket).at_least_once upload.update!(url: "//s3-upload-bucket.s3.dualstack.us-west-1.amazonaws.com/original/1X/#{upload.sha1}.png") s3_object = stub @@ -188,7 +183,6 @@ describe FileStore::S3Store do upload = optimized.upload path = "optimized/1X/#{upload.sha1}_#{optimized.version}_#{optimized.width}x#{optimized.height}.png" - store.expects(:get_depth_for).with(upload.id).returns(0) s3_helper.expects(:s3_bucket).returns(s3_bucket).at_least_once optimized.update!(url: "//s3-upload-bucket.s3.dualstack.us-west-1.amazonaws.com/#{path}") s3_object = stub @@ -207,7 +201,6 @@ describe FileStore::S3Store do end it "removes the file from s3 with the right paths" do - store.expects(:get_depth_for).with(upload.id).returns(0) s3_helper.expects(:s3_bucket).returns(s3_bucket).at_least_once upload.update!(url: "//s3-upload-bucket.s3.dualstack.us-west-1.amazonaws.com/discourse-uploads/original/1X/#{upload.sha1}.png") s3_object = stub @@ -360,8 +353,9 @@ describe FileStore::S3Store do end describe ".update_upload_ACL" do + let(:upload) { Fabricate(:upload, original_filename: "small.pdf", extension: "pdf") } + it "sets acl to public by default" do - upload.update!(original_filename: "small.pdf", extension: "pdf") s3_helper.expects(:s3_bucket).returns(s3_bucket) s3_bucket.expects(:object).with("original/1X/#{upload.sha1}.pdf").returns(s3_object) s3_object.expects(:acl).returns(s3_object) @@ -371,7 +365,7 @@ describe FileStore::S3Store do end it "sets acl to private when upload is marked secure" do - upload.update!(original_filename: "small.pdf", extension: "pdf", secure: true) + upload.update!(secure: true) s3_helper.expects(:s3_bucket).returns(s3_bucket) s3_bucket.expects(:object).with("original/1X/#{upload.sha1}.pdf").returns(s3_object) s3_object.expects(:acl).returns(s3_object) diff --git a/spec/fabricators/upload_fabricator.rb b/spec/fabricators/upload_fabricator.rb index b63eee26442..b7b8b90d669 100644 --- a/spec/fabricators/upload_fabricator.rb +++ b/spec/fabricators/upload_fabricator.rb @@ -78,7 +78,7 @@ Fabricator(:s3_image_upload, from: :upload_s3) do file = Tempfile.new(['fabricated', '.png']) `convert -size #{upload.width}x#{upload.height} xc:white "#{file.path}"` - Discourse.store.store_upload(file, upload) + upload.url = Discourse.store.store_upload(file, upload) upload.sha1 = Upload.generate_digest(file.path) WebMock diff --git a/spec/models/upload_spec.rb b/spec/models/upload_spec.rb index 6613a29cb0c..9071bac3fa1 100644 --- a/spec/models/upload_spec.rb +++ b/spec/models/upload_spec.rb @@ -363,25 +363,29 @@ describe Upload do expect(upload.secure).to eq(false) end - it 'marks a local attachment as secure if secure media enabled' do - SiteSetting.authorized_extensions = "pdf" - upload.update!(original_filename: "small.pdf", extension: "pdf", secure: false, access_control_post: Fabricate(:private_message_post)) - enable_secure_media + context "local attachment" do + before do + SiteSetting.authorized_extensions = "pdf" + end - expect { upload.update_secure_status } - .to change { upload.secure } + let(:upload) { Fabricate(:upload, original_filename: "small.pdf", extension: "pdf", secure: true) } - expect(upload.secure).to eq(true) - end + it 'marks a local attachment as secure if secure media enabled' do + upload.update!(secure: false, access_control_post: Fabricate(:private_message_post)) + enable_secure_media - it 'marks a local attachment as not secure if secure media enabled' do - SiteSetting.authorized_extensions = "pdf" - upload.update!(original_filename: "small.pdf", extension: "pdf", secure: true) + expect { upload.update_secure_status } + .to change { upload.secure } - expect { upload.update_secure_status } - .to change { upload.secure } + expect(upload.secure).to eq(true) + end - expect(upload.secure).to eq(false) + it 'marks a local attachment as not secure if secure media enabled' do + expect { upload.update_secure_status } + .to change { upload.secure } + + expect(upload.secure).to eq(false) + end end it 'does not change secure status of a non-attachment when prevent_anons_from_downloading_files is enabled by itself' do diff --git a/spec/multisite/s3_store_spec.rb b/spec/multisite/s3_store_spec.rb index cc90a40738d..27248b22657 100644 --- a/spec/multisite/s3_store_spec.rb +++ b/spec/multisite/s3_store_spec.rb @@ -108,7 +108,6 @@ RSpec.describe 'Multisite s3 uploads', type: :multisite do it "removes the file from s3 on multisite" do test_multisite_connection('default') do upload = build_upload - store.expects(:get_depth_for).with(upload.id).returns(0) s3_helper.expects(:s3_bucket).returns(s3_bucket).at_least_once upload.update!(url: "//s3-upload-bucket.s3.dualstack.us-west-1.amazonaws.com/#{upload_path}/original/1X/#{upload.sha1}.png") s3_object = stub @@ -125,7 +124,6 @@ RSpec.describe 'Multisite s3 uploads', type: :multisite do it "removes the file from s3 on another multisite db" do test_multisite_connection('second') do upload = build_upload - store.expects(:get_depth_for).with(upload.id).returns(0) s3_helper.expects(:s3_bucket).returns(s3_bucket).at_least_once upload.update!(url: "//s3-upload-bucket.s3.dualstack.us-west-1.amazonaws.com/#{upload_path}/original/1X/#{upload.sha1}.png") s3_object = stub @@ -147,7 +145,6 @@ RSpec.describe 'Multisite s3 uploads', type: :multisite do it "removes the file from s3 on multisite" do test_multisite_connection('default') do upload = build_upload - store.expects(:get_depth_for).with(upload.id).returns(0) s3_helper.expects(:s3_bucket).returns(s3_bucket).at_least_once upload.update!(url: "//s3-upload-bucket.s3.dualstack.us-west-1.amazonaws.com/discourse-uploads/#{upload_path}/original/1X/#{upload.sha1}.png") s3_object = stub @@ -185,14 +182,14 @@ RSpec.describe 'Multisite s3 uploads', type: :multisite do describe "when secure attachments are enabled" do it "returns signed URL with correct path" do test_multisite_connection('default') do - upload = build_upload - upload.update!(original_filename: "small.pdf", extension: "pdf", secure: true) + upload = Fabricate(:upload, original_filename: "small.pdf", extension: "pdf", secure: true) s3_helper.expects(:s3_bucket).returns(s3_bucket).at_least_once s3_bucket.expects(:object).with("#{upload_path}/original/1X/#{upload.sha1}.pdf").returns(s3_object).at_least_once s3_object.expects(:presigned_url).with(:get, expires_in: S3Helper::DOWNLOAD_URL_EXPIRES_AFTER_SECONDS) - expect(store.store_upload(uploaded_file, upload)).to eq( + upload.url = store.store_upload(uploaded_file, upload) + expect(upload.url).to eq( "//some-really-cool-bucket.s3.dualstack.us-west-1.amazonaws.com/#{upload_path}/original/1X/#{upload.sha1}.pdf" )