FIX: Use previous chunk to check if local backup chunk upload complete (#14896)

Uppy and Resumable slice up their chunks differently, which causes a difference
in this algorithm. Let's take a 131.6MB file (137951695 bytes) with a 5MB (5242880 bytes)
chunk size. For resumable, there are 26 chunks, and uppy there are 27. This is
controlled by forceChunkSize in resumable which is false by default. The final
chunk size is 6879695 (chunk size + remainder) whereas in uppy it is 1636815 (just remainder).

This means that the current condition of uploaded_file_size + current_chunk_size >= total_size
is hit twice by uppy, because it uses a more correct number of chunks. This
can be solved for both uppy and resumable by checking the _previous_ chunk
number * chunk_size as the uploaded_file_size.

An example of what is happening before that change, using the current
chunk number to calculate uploaded_file_size.

chunk 26: resumable: uploaded_file_size (26 * 5242880) + current_chunk_size (6879695) = 143194575 >= total_size (137951695) ? YES
chunk 26: uppy: uploaded_file_size (26 * 5242880) + current_chunk_size (5242880) = 141557760 >= total_size (137951695) ? YES
chunk 27: uppy: uploaded_file_size (27 * 5242880) + current_chunk_size (1636815) = 143194575 >= total_size (137951695) ? YES

An example of what this looks like after the change, using the previous
chunk number to calculate uploaded_file_size:

chunk 26: resumable: uploaded_file_size (25 * 5242880) + current_chunk_size (6879695) = 137951695 >= total_size (137951695) ? YES
chunk 26: uppy: uploaded_file_size (25 * 5242880) + current_chunk_size (5242880) = 136314880 >= total_size (137951695) ? NO
chunk 27: uppy: uploaded_file_size (26 * 5242880) + current_chunk_size (1636815) = 137951695 >= total_size (137951695) ? YES
This commit is contained in:
Martin Brennan 2021-11-15 15:08:21 +10:00 committed by GitHub
parent 9c9ad22626
commit 08e625c446
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
2 changed files with 94 additions and 2 deletions

View File

@ -176,13 +176,14 @@ class Admin::BackupsController < Admin::AdminController
chunk_number = params.fetch(:resumableChunkNumber).to_i chunk_number = params.fetch(:resumableChunkNumber).to_i
chunk_size = params.fetch(:resumableChunkSize).to_i chunk_size = params.fetch(:resumableChunkSize).to_i
current_chunk_size = params.fetch(:resumableCurrentChunkSize).to_i current_chunk_size = params.fetch(:resumableCurrentChunkSize).to_i
previous_chunk_number = chunk_number - 1
# path to chunk file # path to chunk file
chunk = BackupRestore::LocalBackupStore.chunk_path(identifier, filename, chunk_number) chunk = BackupRestore::LocalBackupStore.chunk_path(identifier, filename, chunk_number)
# upload chunk # upload chunk
HandleChunkUpload.upload_chunk(chunk, file: file) HandleChunkUpload.upload_chunk(chunk, file: file)
uploaded_file_size = chunk_number * chunk_size uploaded_file_size = previous_chunk_number * chunk_size
# when all chunks are uploaded # when all chunks are uploaded
if uploaded_file_size + current_chunk_size >= total_size if uploaded_file_size + current_chunk_size >= total_size
# merge all the chunks in a background thread # merge all the chunks in a background thread

View File

@ -234,10 +234,10 @@ RSpec.describe Admin::BackupsController do
describe "when filename is valid" do describe "when filename is valid" do
it "should upload the file successfully" do it "should upload the file successfully" do
freeze_time
described_class.any_instance.expects(:has_enough_space_on_disk?).returns(true) described_class.any_instance.expects(:has_enough_space_on_disk?).returns(true)
filename = 'test_Site-0123456789.tar.gz' filename = 'test_Site-0123456789.tar.gz'
@paths = [backup_path(File.join('tmp', 'test', "#{filename}.part1"))]
post "/admin/backups/upload.json", params: { post "/admin/backups/upload.json", params: {
resumableFilename: filename, resumableFilename: filename,
@ -248,11 +248,102 @@ RSpec.describe Admin::BackupsController do
resumableCurrentChunkSize: '1', resumableCurrentChunkSize: '1',
file: fixture_file_upload(Tempfile.new) file: fixture_file_upload(Tempfile.new)
} }
expect_job_enqueued(job: :backup_chunks_merger, args: {
filename: filename, identifier: 'test', chunks: 1
}, at: 5.seconds.from_now)
expect(response.status).to eq(200) expect(response.status).to eq(200)
expect(response.body).to eq("") expect(response.body).to eq("")
end end
end end
describe "completing an upload by enqueuing backup_chunks_merger" do
let(:filename) { 'test_Site-0123456789.tar.gz' }
it "works with a single chunk" do
freeze_time
described_class.any_instance.expects(:has_enough_space_on_disk?).returns(true)
# 2MB file, 2MB chunks = 1x 2MB chunk
post "/admin/backups/upload.json", params: {
resumableFilename: filename,
resumableTotalSize: '2097152',
resumableIdentifier: 'test',
resumableChunkNumber: '1',
resumableChunkSize: '2097152',
resumableCurrentChunkSize: '2097152',
file: fixture_file_upload(Tempfile.new)
}
expect_job_enqueued(job: :backup_chunks_merger, args: {
filename: filename, identifier: 'test', chunks: 1
}, at: 5.seconds.from_now)
end
it "works with multiple chunks when the final chunk is chunk_size + remainder" do
freeze_time
described_class.any_instance.expects(:has_enough_space_on_disk?).twice.returns(true)
# 5MB file, 2MB chunks = 1x 2MB chunk + 1x 3MB chunk with resumable.js
post "/admin/backups/upload.json", params: {
resumableFilename: filename,
resumableTotalSize: '5242880',
resumableIdentifier: 'test',
resumableChunkNumber: '1',
resumableChunkSize: '2097152',
resumableCurrentChunkSize: '2097152',
file: fixture_file_upload(Tempfile.new)
}
post "/admin/backups/upload.json", params: {
resumableFilename: filename,
resumableTotalSize: '5242880',
resumableIdentifier: 'test',
resumableChunkNumber: '2',
resumableChunkSize: '2097152',
resumableCurrentChunkSize: '3145728',
file: fixture_file_upload(Tempfile.new)
}
expect_job_enqueued(job: :backup_chunks_merger, args: {
filename: filename, identifier: 'test', chunks: 2
}, at: 5.seconds.from_now)
end
it "works with multiple chunks when the final chunk is just the remaninder" do
freeze_time
described_class.any_instance.expects(:has_enough_space_on_disk?).times(3).returns(true)
# 5MB file, 2MB chunks = 2x 2MB chunk + 1x 1MB chunk with uppy.js
post "/admin/backups/upload.json", params: {
resumableFilename: filename,
resumableTotalSize: '5242880',
resumableIdentifier: 'test',
resumableChunkNumber: '1',
resumableChunkSize: '2097152',
resumableCurrentChunkSize: '2097152',
file: fixture_file_upload(Tempfile.new)
}
post "/admin/backups/upload.json", params: {
resumableFilename: filename,
resumableTotalSize: '5242880',
resumableIdentifier: 'test',
resumableChunkNumber: '2',
resumableChunkSize: '2097152',
resumableCurrentChunkSize: '2097152',
file: fixture_file_upload(Tempfile.new)
}
post "/admin/backups/upload.json", params: {
resumableFilename: filename,
resumableTotalSize: '5242880',
resumableIdentifier: 'test',
resumableChunkNumber: '3',
resumableChunkSize: '2097152',
resumableCurrentChunkSize: '1048576',
file: fixture_file_upload(Tempfile.new)
}
expect_job_enqueued(job: :backup_chunks_merger, args: {
filename: filename, identifier: 'test', chunks: 3
}, at: 5.seconds.from_now)
end
end
end end
describe "#check_backup_chunk" do describe "#check_backup_chunk" do