2019-05-02 18:17:27 -04:00
|
|
|
# frozen_string_literal: true
|
|
|
|
|
2017-10-03 03:00:42 -04:00
|
|
|
require "aws-sdk-s3"
|
2014-09-24 16:52:09 -04:00
|
|
|
|
|
|
|
class S3Helper
|
2021-06-25 00:22:31 -04:00
|
|
|
FIFTEEN_MEGABYTES = 15 * 1024 * 1024
|
2014-09-24 16:52:09 -04:00
|
|
|
|
2016-08-17 04:16:00 -04:00
|
|
|
class SettingMissing < StandardError
|
|
|
|
end
|
|
|
|
|
2018-05-22 17:21:52 -04:00
|
|
|
attr_reader :s3_bucket_name, :s3_bucket_folder_path
|
2016-08-19 02:08:04 -04:00
|
|
|
|
2020-07-02 23:42:36 -04:00
|
|
|
##
|
|
|
|
# Controls the following:
|
|
|
|
#
|
|
|
|
# * cache time for secure-media URLs
|
|
|
|
# * expiry time for S3 presigned URLs, which include backup downloads and
|
|
|
|
# any upload that has a private ACL (e.g. secure uploads)
|
2022-05-25 19:53:01 -04:00
|
|
|
#
|
|
|
|
# SiteSetting.s3_presigned_get_url_expires_after_seconds
|
2021-07-27 18:42:25 -04:00
|
|
|
|
|
|
|
##
|
|
|
|
# Controls the following:
|
|
|
|
#
|
|
|
|
# * presigned put_object URLs for direct S3 uploads
|
|
|
|
UPLOAD_URL_EXPIRES_AFTER_SECONDS ||= 10.minutes.to_i
|
2019-06-05 23:27:24 -04:00
|
|
|
|
2017-10-06 01:20:01 -04:00
|
|
|
def initialize(s3_bucket_name, tombstone_prefix = "", options = {})
|
2019-01-04 01:46:09 -05:00
|
|
|
@s3_client = options.delete(:client)
|
2016-08-17 04:16:00 -04:00
|
|
|
@s3_options = default_s3_options.merge(options)
|
2016-08-15 23:13:59 -04:00
|
|
|
|
2016-08-19 02:08:04 -04:00
|
|
|
@s3_bucket_name, @s3_bucket_folder_path =
|
|
|
|
begin
|
2017-10-06 01:20:01 -04:00
|
|
|
raise Discourse::InvalidParameters.new("s3_bucket_name") if s3_bucket_name.blank?
|
2019-01-08 09:34:48 -05:00
|
|
|
self.class.get_bucket_and_folder_path(s3_bucket_name)
|
2016-08-15 04:06:29 -04:00
|
|
|
end
|
2014-09-24 16:52:09 -04:00
|
|
|
|
2016-08-15 04:06:29 -04:00
|
|
|
@tombstone_prefix =
|
|
|
|
if @s3_bucket_folder_path
|
|
|
|
File.join(@s3_bucket_folder_path, tombstone_prefix)
|
|
|
|
else
|
|
|
|
tombstone_prefix
|
|
|
|
end
|
2014-09-24 16:52:09 -04:00
|
|
|
end
|
|
|
|
|
2021-11-07 18:16:38 -05:00
|
|
|
def self.build_from_config(use_db_s3_config: false, for_backup: false, s3_client: nil)
|
|
|
|
setting_klass = use_db_s3_config ? SiteSetting : GlobalSetting
|
|
|
|
options = S3Helper.s3_options(setting_klass)
|
|
|
|
options[:client] = s3_client if s3_client.present?
|
DEV: Introduce S3 transfer acceleration for uploads behind hidden setting (#24238)
This commit adds an `enable_s3_transfer_acceleration` site setting,
which is hidden to begin with. We are adding this because in certain
regions, using https://aws.amazon.com/s3/transfer-acceleration/ can
drastically speed up uploads, sometimes as much as 70% in certain
regions depending on the target bucket region. This is important for
us because we have direct S3 multipart uploads enabled everywhere
on our hosting.
To start, we only want this on the uploads bucket, not the backup one.
Also, this will accelerate both uploads **and** downloads, depending
on whether a presigned URL is used for downloading. This is the case
when secure uploads is enabled, not anywhere else at this time. To
enable the S3 acceleration on downloads more generally would be a
more in-depth change, since we currently store S3 Upload record URLs
like this:
```
url: "//test.s3.dualstack.us-east-2.amazonaws.com/original/2X/6/123456.png"
```
For acceleration, `s3.dualstack` would need to be changed to `s3-accelerate.dualstack`
here.
Note that for this to have any effect, Transfer Acceleration must be enabled
on the S3 bucket used for uploads per https://docs.aws.amazon.com/AmazonS3/latest/userguide/transfer-acceleration-examples.html.
2023-11-06 20:50:40 -05:00
|
|
|
use_accelerate_endpoint =
|
|
|
|
(
|
|
|
|
if use_db_s3_config
|
|
|
|
SiteSetting.enable_s3_transfer_acceleration
|
|
|
|
else
|
|
|
|
GlobalSetting.s3_enable_transfer_acceleration
|
|
|
|
end
|
|
|
|
)
|
|
|
|
options[:use_accelerate_endpoint] = !for_backup && use_accelerate_endpoint
|
2021-11-07 18:16:38 -05:00
|
|
|
|
|
|
|
bucket =
|
|
|
|
if for_backup
|
|
|
|
setting_klass.s3_backup_bucket
|
|
|
|
else
|
|
|
|
use_db_s3_config ? SiteSetting.s3_upload_bucket : GlobalSetting.s3_bucket
|
|
|
|
end
|
|
|
|
|
|
|
|
S3Helper.new(bucket.downcase, "", options)
|
|
|
|
end
|
|
|
|
|
2019-01-08 09:34:48 -05:00
|
|
|
def self.get_bucket_and_folder_path(s3_bucket_name)
|
2020-04-30 02:48:34 -04:00
|
|
|
s3_bucket_name.downcase.split("/", 2)
|
2019-01-08 09:34:48 -05:00
|
|
|
end
|
|
|
|
|
2016-08-14 23:21:24 -04:00
|
|
|
def upload(file, path, options = {})
|
2016-08-15 04:06:29 -04:00
|
|
|
path = get_path_for_s3_upload(path)
|
2019-01-04 01:16:22 -05:00
|
|
|
obj = s3_bucket.object(path)
|
|
|
|
|
|
|
|
etag =
|
|
|
|
begin
|
2021-06-25 00:22:31 -04:00
|
|
|
if File.size(file.path) >= FIFTEEN_MEGABYTES
|
|
|
|
options[:multipart_threshold] = FIFTEEN_MEGABYTES
|
2019-01-04 01:16:22 -05:00
|
|
|
obj.upload_file(file, options)
|
|
|
|
obj.load
|
|
|
|
obj.etag
|
|
|
|
else
|
|
|
|
options[:body] = file
|
|
|
|
obj.put(options).etag
|
2023-01-09 07:10:19 -05:00
|
|
|
end
|
2019-01-04 01:16:22 -05:00
|
|
|
end
|
|
|
|
|
2019-11-14 15:10:51 -05:00
|
|
|
[path, etag.gsub('"', "")]
|
2014-09-24 16:52:09 -04:00
|
|
|
end
|
|
|
|
|
FEATURE: Direct S3 multipart uploads for backups (#14736)
This PR introduces a new `enable_experimental_backup_uploads` site setting (default false and hidden), which when enabled alongside `enable_direct_s3_uploads` will allow for direct S3 multipart uploads of backup .tar.gz files.
To make multipart external uploads work with both the S3BackupStore and the S3Store, I've had to move several methods out of S3Store and into S3Helper, including:
* presigned_url
* create_multipart
* abort_multipart
* complete_multipart
* presign_multipart_part
* list_multipart_parts
Then, S3Store and S3BackupStore either delegate directly to S3Helper or have their own special methods to call S3Helper for these methods. FileStore.temporary_upload_path has also removed its dependence on upload_path, and can now be used interchangeably between the stores. A similar change was made in the frontend as well, moving the multipart related JS code out of ComposerUppyUpload and into a mixin of its own, so it can also be used by UppyUploadMixin.
Some changes to ExternalUploadManager had to be made here as well. The backup direct uploads do not need an Upload record made for them in the database, so they can be moved to their final S3 resting place when completing the multipart upload.
This changeset is not perfect; it introduces some special cases in UploadController to handle backups that was previously in BackupController, because UploadController is where the multipart routes are located. A subsequent pull request will pull these routes into a module or some other sharing pattern, along with hooks, so the backup controller and the upload controller (and any future controllers that may need them) can include these routes in a nicer way.
2021-11-10 17:25:31 -05:00
|
|
|
def path_from_url(url)
|
|
|
|
URI.parse(url).path.delete_prefix("/")
|
|
|
|
end
|
|
|
|
|
2016-08-15 04:06:29 -04:00
|
|
|
def remove(s3_filename, copy_to_tombstone = false)
|
2019-05-02 18:17:27 -04:00
|
|
|
s3_filename = s3_filename.dup
|
|
|
|
|
2014-09-24 16:52:09 -04:00
|
|
|
# copy the file in tombstone
|
2016-08-15 04:06:29 -04:00
|
|
|
if copy_to_tombstone && @tombstone_prefix.present?
|
2018-08-08 03:57:58 -04:00
|
|
|
self.copy(get_path_for_s3_upload(s3_filename), File.join(@tombstone_prefix, s3_filename))
|
2014-09-24 16:52:09 -04:00
|
|
|
end
|
2016-08-14 23:21:24 -04:00
|
|
|
|
2014-09-24 16:52:09 -04:00
|
|
|
# delete the file
|
2018-12-19 00:32:32 -05:00
|
|
|
s3_filename.prepend(multisite_upload_path) if Rails.configuration.multisite
|
2020-05-28 14:58:23 -04:00
|
|
|
delete_object(get_path_for_s3_upload(s3_filename))
|
2022-08-09 05:46:35 -04:00
|
|
|
rescue Aws::S3::Errors::NoSuchKey, Aws::S3::Errors::NotFound
|
2020-05-28 14:58:23 -04:00
|
|
|
end
|
|
|
|
|
|
|
|
def delete_object(key)
|
|
|
|
s3_bucket.object(key).delete
|
2022-08-09 05:46:35 -04:00
|
|
|
rescue Aws::S3::Errors::NoSuchKey, Aws::S3::Errors::NotFound
|
2014-09-24 16:52:09 -04:00
|
|
|
end
|
|
|
|
|
2022-11-07 07:53:14 -05:00
|
|
|
def delete_objects(keys)
|
|
|
|
s3_bucket.delete_objects({ delete: { objects: keys.map { |k| { key: k } }, quiet: true } })
|
|
|
|
end
|
|
|
|
|
2018-09-10 05:01:11 -04:00
|
|
|
def copy(source, destination, options: {})
|
2021-09-09 22:59:51 -04:00
|
|
|
if options[:apply_metadata_to_destination]
|
|
|
|
options = options.except(:apply_metadata_to_destination).merge(metadata_directive: "REPLACE")
|
|
|
|
end
|
|
|
|
|
2021-07-27 18:42:25 -04:00
|
|
|
destination = get_path_for_s3_upload(destination)
|
2022-06-28 15:30:00 -04:00
|
|
|
source_object =
|
|
|
|
if !Rails.configuration.multisite || source.include?(multisite_upload_path) ||
|
|
|
|
source.include?(@tombstone_prefix)
|
|
|
|
s3_bucket.object(source)
|
|
|
|
elsif @s3_bucket_folder_path
|
|
|
|
folder, filename = source.split("/", 2)
|
|
|
|
s3_bucket.object(File.join(folder, multisite_upload_path, filename))
|
2018-12-19 00:32:32 -05:00
|
|
|
else
|
2022-06-28 15:30:00 -04:00
|
|
|
s3_bucket.object(File.join(multisite_upload_path, source))
|
|
|
|
end
|
|
|
|
|
|
|
|
if source_object.size > FIFTEEN_MEGABYTES
|
|
|
|
options[:multipart_copy] = true
|
|
|
|
options[:content_length] = source_object.size
|
2018-12-19 00:32:32 -05:00
|
|
|
end
|
2021-07-27 18:42:25 -04:00
|
|
|
|
2021-09-09 22:59:51 -04:00
|
|
|
destination_object = s3_bucket.object(destination)
|
|
|
|
|
2022-06-28 15:30:00 -04:00
|
|
|
# Note for small files that do not use multipart copy: Any options for metadata
|
|
|
|
# (e.g. content_disposition, content_type) will not be applied unless the
|
|
|
|
# metadata_directive = "REPLACE" option is passed in. If this is not passed in,
|
|
|
|
# the source object's metadata will be used.
|
|
|
|
# For larger files it copies the metadata from the source file and merges it
|
|
|
|
# with values from the copy call.
|
|
|
|
response = destination_object.copy_from(source_object, options)
|
|
|
|
|
|
|
|
etag =
|
|
|
|
if response.respond_to?(:copy_object_result)
|
|
|
|
# small files, regular copy
|
|
|
|
response.copy_object_result.etag
|
|
|
|
else
|
|
|
|
# larger files, multipart copy
|
|
|
|
response.data.etag
|
|
|
|
end
|
|
|
|
|
|
|
|
[destination, etag.gsub('"', "")]
|
2018-08-07 23:26:05 -04:00
|
|
|
end
|
|
|
|
|
2021-11-07 18:16:38 -05:00
|
|
|
# Several places in the application need certain CORS rules to exist
|
|
|
|
# inside an S3 bucket so requests to the bucket can be made
|
|
|
|
# directly from the browser. The s3:ensure_cors_rules rake task
|
|
|
|
# is used to ensure these rules exist for assets, S3 backups, and
|
|
|
|
# direct S3 uploads, depending on configuration.
|
2018-10-14 21:43:31 -04:00
|
|
|
def ensure_cors!(rules = nil)
|
2020-05-25 16:09:34 -04:00
|
|
|
return unless SiteSetting.s3_install_cors_rule
|
2021-11-07 18:16:38 -05:00
|
|
|
rules = [rules] if !rules.is_a?(Array)
|
|
|
|
existing_rules = fetch_bucket_cors_rules
|
2020-05-25 16:09:34 -04:00
|
|
|
|
2021-11-07 18:16:38 -05:00
|
|
|
new_rules = rules - existing_rules
|
|
|
|
return false if new_rules.empty?
|
2017-10-08 19:26:58 -04:00
|
|
|
|
2021-11-07 18:16:38 -05:00
|
|
|
final_rules = existing_rules + new_rules
|
2017-10-08 19:26:58 -04:00
|
|
|
|
2021-11-07 18:16:38 -05:00
|
|
|
begin
|
2017-10-08 19:26:58 -04:00
|
|
|
s3_resource.client.put_bucket_cors(
|
|
|
|
bucket: @s3_bucket_name,
|
|
|
|
cors_configuration: {
|
2021-11-07 18:16:38 -05:00
|
|
|
cors_rules: final_rules,
|
2017-10-08 19:26:58 -04:00
|
|
|
},
|
|
|
|
)
|
2023-01-11 18:41:39 -05:00
|
|
|
rescue Aws::S3::Errors::AccessDenied
|
|
|
|
Rails.logger.info(
|
|
|
|
"Could not PutBucketCors rules for #{@s3_bucket_name}, rules: #{final_rules}",
|
2021-11-07 18:16:38 -05:00
|
|
|
)
|
|
|
|
return false
|
2017-10-08 19:26:58 -04:00
|
|
|
end
|
2021-11-07 18:16:38 -05:00
|
|
|
|
|
|
|
true
|
2017-10-08 19:26:58 -04:00
|
|
|
end
|
|
|
|
|
|
|
|
def update_lifecycle(id, days, prefix: nil, tag: nil)
|
|
|
|
filter = {}
|
|
|
|
|
|
|
|
if prefix
|
|
|
|
filter[:prefix] = prefix
|
|
|
|
elsif tag
|
|
|
|
filter[:tag] = tag
|
|
|
|
end
|
2015-05-25 11:59:00 -04:00
|
|
|
|
2014-09-24 16:52:09 -04:00
|
|
|
# cf. http://docs.aws.amazon.com/AmazonS3/latest/dev/object-lifecycle-mgmt.html
|
2017-10-08 19:26:58 -04:00
|
|
|
rule = { id: id, status: "Enabled", expiration: { days: days }, filter: filter }
|
2017-10-03 03:00:42 -04:00
|
|
|
|
2017-10-08 19:26:58 -04:00
|
|
|
rules = []
|
2017-10-03 03:00:42 -04:00
|
|
|
|
2017-10-08 19:26:58 -04:00
|
|
|
begin
|
|
|
|
rules = s3_resource.client.get_bucket_lifecycle_configuration(bucket: @s3_bucket_name).rules
|
|
|
|
rescue Aws::S3::Errors::NoSuchLifecycleConfiguration
|
|
|
|
# skip trying to merge
|
|
|
|
end
|
2017-10-03 03:00:42 -04:00
|
|
|
|
2017-11-12 23:36:45 -05:00
|
|
|
# in the past we has a rule that was called purge-tombstone vs purge_tombstone
|
|
|
|
# just go ahead and normalize for our bucket
|
|
|
|
rules.delete_if { |r| r.id.gsub("_", "-") == id.gsub("_", "-") }
|
2017-10-03 03:00:42 -04:00
|
|
|
|
|
|
|
rules << rule
|
|
|
|
|
2017-11-12 23:36:45 -05:00
|
|
|
# normalize filter in rules, due to AWS library bug
|
|
|
|
rules =
|
|
|
|
rules.map do |r|
|
|
|
|
r = r.to_h
|
|
|
|
prefix = r.delete(:prefix)
|
|
|
|
r[:filter] = { prefix: prefix } if prefix
|
2023-01-09 07:10:19 -05:00
|
|
|
r
|
2017-11-12 23:36:45 -05:00
|
|
|
end
|
|
|
|
|
2017-10-08 19:26:58 -04:00
|
|
|
s3_resource.client.put_bucket_lifecycle_configuration(
|
|
|
|
bucket: @s3_bucket_name,
|
|
|
|
lifecycle_configuration: {
|
2017-10-03 03:00:42 -04:00
|
|
|
rules: rules,
|
|
|
|
},
|
|
|
|
)
|
|
|
|
end
|
|
|
|
|
|
|
|
def update_tombstone_lifecycle(grace_period)
|
2018-09-16 20:57:50 -04:00
|
|
|
return if !SiteSetting.s3_configure_tombstone_policy
|
2017-10-03 03:00:42 -04:00
|
|
|
return if @tombstone_prefix.blank?
|
|
|
|
update_lifecycle("purge_tombstone", grace_period, prefix: @tombstone_prefix)
|
|
|
|
end
|
|
|
|
|
2018-11-26 14:24:51 -05:00
|
|
|
def list(prefix = "", marker = nil)
|
|
|
|
options = { prefix: get_path_for_s3_upload(prefix) }
|
|
|
|
options[:marker] = marker if marker.present?
|
|
|
|
s3_bucket.objects(options)
|
2017-10-03 03:00:42 -04:00
|
|
|
end
|
|
|
|
|
|
|
|
def tag_file(key, tags)
|
|
|
|
tag_array = []
|
|
|
|
tags.each { |k, v| tag_array << { key: k.to_s, value: v.to_s } }
|
|
|
|
|
|
|
|
s3_resource.client.put_object_tagging(
|
|
|
|
bucket: @s3_bucket_name,
|
|
|
|
key: key,
|
|
|
|
tagging: {
|
|
|
|
tag_set: tag_array,
|
|
|
|
},
|
|
|
|
)
|
2014-09-24 16:52:09 -04:00
|
|
|
end
|
|
|
|
|
2018-10-14 21:43:31 -04:00
|
|
|
def object(path)
|
2018-12-26 11:34:49 -05:00
|
|
|
s3_bucket.object(get_path_for_s3_upload(path))
|
2018-10-14 21:43:31 -04:00
|
|
|
end
|
|
|
|
|
2017-10-06 01:20:01 -04:00
|
|
|
def self.s3_options(obj)
|
2019-03-20 09:58:20 -04:00
|
|
|
opts = { region: obj.s3_region }
|
2014-09-24 16:52:09 -04:00
|
|
|
|
2019-02-05 11:50:27 -05:00
|
|
|
opts[:endpoint] = SiteSetting.s3_endpoint if SiteSetting.s3_endpoint.present?
|
2020-04-29 18:04:59 -04:00
|
|
|
opts[:http_continue_timeout] = SiteSetting.s3_http_continue_timeout
|
2019-02-05 11:50:27 -05:00
|
|
|
|
2017-10-06 01:20:01 -04:00
|
|
|
unless obj.s3_use_iam_profile
|
|
|
|
opts[:access_key_id] = obj.s3_access_key_id
|
|
|
|
opts[:secret_access_key] = obj.s3_secret_access_key
|
|
|
|
end
|
|
|
|
|
|
|
|
opts
|
2016-08-15 04:06:29 -04:00
|
|
|
end
|
2014-09-24 16:52:09 -04:00
|
|
|
|
2019-01-31 23:40:48 -05:00
|
|
|
def download_file(filename, destination_path, failure_message = nil)
|
2020-08-03 23:08:37 -04:00
|
|
|
object(filename).download_file(destination_path)
|
|
|
|
rescue => err
|
2020-08-12 16:01:14 -04:00
|
|
|
raise failure_message&.to_s ||
|
|
|
|
"Failed to download #{filename} because #{err.message.length > 0 ? err.message : err.class.to_s}"
|
2019-01-31 23:40:48 -05:00
|
|
|
end
|
|
|
|
|
|
|
|
def s3_client
|
|
|
|
@s3_client ||= Aws::S3::Client.new(@s3_options)
|
|
|
|
end
|
|
|
|
|
|
|
|
def s3_inventory_path(path = "inventory")
|
|
|
|
get_path_for_s3_upload(path)
|
|
|
|
end
|
|
|
|
|
FEATURE: Direct S3 multipart uploads for backups (#14736)
This PR introduces a new `enable_experimental_backup_uploads` site setting (default false and hidden), which when enabled alongside `enable_direct_s3_uploads` will allow for direct S3 multipart uploads of backup .tar.gz files.
To make multipart external uploads work with both the S3BackupStore and the S3Store, I've had to move several methods out of S3Store and into S3Helper, including:
* presigned_url
* create_multipart
* abort_multipart
* complete_multipart
* presign_multipart_part
* list_multipart_parts
Then, S3Store and S3BackupStore either delegate directly to S3Helper or have their own special methods to call S3Helper for these methods. FileStore.temporary_upload_path has also removed its dependence on upload_path, and can now be used interchangeably between the stores. A similar change was made in the frontend as well, moving the multipart related JS code out of ComposerUppyUpload and into a mixin of its own, so it can also be used by UppyUploadMixin.
Some changes to ExternalUploadManager had to be made here as well. The backup direct uploads do not need an Upload record made for them in the database, so they can be moved to their final S3 resting place when completing the multipart upload.
This changeset is not perfect; it introduces some special cases in UploadController to handle backups that was previously in BackupController, because UploadController is where the multipart routes are located. A subsequent pull request will pull these routes into a module or some other sharing pattern, along with hooks, so the backup controller and the upload controller (and any future controllers that may need them) can include these routes in a nicer way.
2021-11-10 17:25:31 -05:00
|
|
|
def abort_multipart(key:, upload_id:)
|
|
|
|
s3_client.abort_multipart_upload(bucket: s3_bucket_name, key: key, upload_id: upload_id)
|
|
|
|
end
|
|
|
|
|
|
|
|
def create_multipart(key, content_type, metadata: {})
|
|
|
|
response =
|
|
|
|
s3_client.create_multipart_upload(
|
2023-06-06 01:47:40 -04:00
|
|
|
acl: SiteSetting.s3_use_acls ? "private" : nil,
|
FEATURE: Direct S3 multipart uploads for backups (#14736)
This PR introduces a new `enable_experimental_backup_uploads` site setting (default false and hidden), which when enabled alongside `enable_direct_s3_uploads` will allow for direct S3 multipart uploads of backup .tar.gz files.
To make multipart external uploads work with both the S3BackupStore and the S3Store, I've had to move several methods out of S3Store and into S3Helper, including:
* presigned_url
* create_multipart
* abort_multipart
* complete_multipart
* presign_multipart_part
* list_multipart_parts
Then, S3Store and S3BackupStore either delegate directly to S3Helper or have their own special methods to call S3Helper for these methods. FileStore.temporary_upload_path has also removed its dependence on upload_path, and can now be used interchangeably between the stores. A similar change was made in the frontend as well, moving the multipart related JS code out of ComposerUppyUpload and into a mixin of its own, so it can also be used by UppyUploadMixin.
Some changes to ExternalUploadManager had to be made here as well. The backup direct uploads do not need an Upload record made for them in the database, so they can be moved to their final S3 resting place when completing the multipart upload.
This changeset is not perfect; it introduces some special cases in UploadController to handle backups that was previously in BackupController, because UploadController is where the multipart routes are located. A subsequent pull request will pull these routes into a module or some other sharing pattern, along with hooks, so the backup controller and the upload controller (and any future controllers that may need them) can include these routes in a nicer way.
2021-11-10 17:25:31 -05:00
|
|
|
bucket: s3_bucket_name,
|
|
|
|
key: key,
|
|
|
|
content_type: content_type,
|
|
|
|
metadata: metadata,
|
|
|
|
)
|
|
|
|
{ upload_id: response.upload_id, key: key }
|
|
|
|
end
|
|
|
|
|
|
|
|
def presign_multipart_part(upload_id:, key:, part_number:)
|
|
|
|
presigned_url(
|
|
|
|
key,
|
|
|
|
method: :upload_part,
|
|
|
|
expires_in: S3Helper::UPLOAD_URL_EXPIRES_AFTER_SECONDS,
|
|
|
|
opts: {
|
|
|
|
part_number: part_number,
|
|
|
|
upload_id: upload_id,
|
|
|
|
},
|
|
|
|
)
|
|
|
|
end
|
|
|
|
|
|
|
|
# Important note from the S3 documentation:
|
|
|
|
#
|
|
|
|
# This request returns a default and maximum of 1000 parts.
|
|
|
|
# You can restrict the number of parts returned by specifying the
|
|
|
|
# max_parts argument. If your multipart upload consists of more than 1,000
|
|
|
|
# parts, the response returns an IsTruncated field with the value of true,
|
|
|
|
# and a NextPartNumberMarker element.
|
|
|
|
#
|
|
|
|
# In subsequent ListParts requests you can include the part_number_marker arg
|
|
|
|
# using the NextPartNumberMarker the field value from the previous response to
|
|
|
|
# get more parts.
|
|
|
|
#
|
|
|
|
# See https://docs.aws.amazon.com/sdk-for-ruby/v3/api/Aws/S3/Client.html#list_parts-instance_method
|
|
|
|
def list_multipart_parts(upload_id:, key:, max_parts: 1000, start_from_part_number: nil)
|
|
|
|
options = { bucket: s3_bucket_name, key: key, upload_id: upload_id, max_parts: max_parts }
|
|
|
|
|
|
|
|
options[:part_number_marker] = start_from_part_number if start_from_part_number.present?
|
|
|
|
|
|
|
|
s3_client.list_parts(options)
|
|
|
|
end
|
|
|
|
|
|
|
|
def complete_multipart(upload_id:, key:, parts:)
|
|
|
|
s3_client.complete_multipart_upload(
|
|
|
|
bucket: s3_bucket_name,
|
|
|
|
key: key,
|
|
|
|
upload_id: upload_id,
|
|
|
|
multipart_upload: {
|
|
|
|
parts: parts,
|
|
|
|
},
|
|
|
|
)
|
|
|
|
end
|
|
|
|
|
|
|
|
def presigned_url(key, method:, expires_in: S3Helper::UPLOAD_URL_EXPIRES_AFTER_SECONDS, opts: {})
|
|
|
|
Aws::S3::Presigner.new(client: s3_client).presigned_url(
|
|
|
|
method,
|
DEV: Introduce S3 transfer acceleration for uploads behind hidden setting (#24238)
This commit adds an `enable_s3_transfer_acceleration` site setting,
which is hidden to begin with. We are adding this because in certain
regions, using https://aws.amazon.com/s3/transfer-acceleration/ can
drastically speed up uploads, sometimes as much as 70% in certain
regions depending on the target bucket region. This is important for
us because we have direct S3 multipart uploads enabled everywhere
on our hosting.
To start, we only want this on the uploads bucket, not the backup one.
Also, this will accelerate both uploads **and** downloads, depending
on whether a presigned URL is used for downloading. This is the case
when secure uploads is enabled, not anywhere else at this time. To
enable the S3 acceleration on downloads more generally would be a
more in-depth change, since we currently store S3 Upload record URLs
like this:
```
url: "//test.s3.dualstack.us-east-2.amazonaws.com/original/2X/6/123456.png"
```
For acceleration, `s3.dualstack` would need to be changed to `s3-accelerate.dualstack`
here.
Note that for this to have any effect, Transfer Acceleration must be enabled
on the S3 bucket used for uploads per https://docs.aws.amazon.com/AmazonS3/latest/userguide/transfer-acceleration-examples.html.
2023-11-06 20:50:40 -05:00
|
|
|
{
|
|
|
|
bucket: s3_bucket_name,
|
|
|
|
key: key,
|
|
|
|
expires_in: expires_in,
|
|
|
|
use_accelerate_endpoint: @s3_options[:use_accelerate_endpoint],
|
|
|
|
}.merge(opts),
|
FEATURE: Direct S3 multipart uploads for backups (#14736)
This PR introduces a new `enable_experimental_backup_uploads` site setting (default false and hidden), which when enabled alongside `enable_direct_s3_uploads` will allow for direct S3 multipart uploads of backup .tar.gz files.
To make multipart external uploads work with both the S3BackupStore and the S3Store, I've had to move several methods out of S3Store and into S3Helper, including:
* presigned_url
* create_multipart
* abort_multipart
* complete_multipart
* presign_multipart_part
* list_multipart_parts
Then, S3Store and S3BackupStore either delegate directly to S3Helper or have their own special methods to call S3Helper for these methods. FileStore.temporary_upload_path has also removed its dependence on upload_path, and can now be used interchangeably between the stores. A similar change was made in the frontend as well, moving the multipart related JS code out of ComposerUppyUpload and into a mixin of its own, so it can also be used by UppyUploadMixin.
Some changes to ExternalUploadManager had to be made here as well. The backup direct uploads do not need an Upload record made for them in the database, so they can be moved to their final S3 resting place when completing the multipart upload.
This changeset is not perfect; it introduces some special cases in UploadController to handle backups that was previously in BackupController, because UploadController is where the multipart routes are located. A subsequent pull request will pull these routes into a module or some other sharing pattern, along with hooks, so the backup controller and the upload controller (and any future controllers that may need them) can include these routes in a nicer way.
2021-11-10 17:25:31 -05:00
|
|
|
)
|
|
|
|
end
|
|
|
|
|
2023-08-22 21:18:33 -04:00
|
|
|
# Returns url, headers in a tuple which is needed in some cases.
|
|
|
|
def presigned_request(
|
|
|
|
key,
|
|
|
|
method:,
|
|
|
|
expires_in: S3Helper::UPLOAD_URL_EXPIRES_AFTER_SECONDS,
|
|
|
|
opts: {}
|
|
|
|
)
|
|
|
|
Aws::S3::Presigner.new(client: s3_client).presigned_request(
|
|
|
|
method,
|
DEV: Introduce S3 transfer acceleration for uploads behind hidden setting (#24238)
This commit adds an `enable_s3_transfer_acceleration` site setting,
which is hidden to begin with. We are adding this because in certain
regions, using https://aws.amazon.com/s3/transfer-acceleration/ can
drastically speed up uploads, sometimes as much as 70% in certain
regions depending on the target bucket region. This is important for
us because we have direct S3 multipart uploads enabled everywhere
on our hosting.
To start, we only want this on the uploads bucket, not the backup one.
Also, this will accelerate both uploads **and** downloads, depending
on whether a presigned URL is used for downloading. This is the case
when secure uploads is enabled, not anywhere else at this time. To
enable the S3 acceleration on downloads more generally would be a
more in-depth change, since we currently store S3 Upload record URLs
like this:
```
url: "//test.s3.dualstack.us-east-2.amazonaws.com/original/2X/6/123456.png"
```
For acceleration, `s3.dualstack` would need to be changed to `s3-accelerate.dualstack`
here.
Note that for this to have any effect, Transfer Acceleration must be enabled
on the S3 bucket used for uploads per https://docs.aws.amazon.com/AmazonS3/latest/userguide/transfer-acceleration-examples.html.
2023-11-06 20:50:40 -05:00
|
|
|
{
|
|
|
|
bucket: s3_bucket_name,
|
|
|
|
key: key,
|
|
|
|
expires_in: expires_in,
|
|
|
|
use_accelerate_endpoint: @s3_options[:use_accelerate_endpoint],
|
|
|
|
}.merge(opts),
|
2023-08-22 21:18:33 -04:00
|
|
|
)
|
|
|
|
end
|
|
|
|
|
2017-10-06 01:20:01 -04:00
|
|
|
private
|
2014-09-24 16:52:09 -04:00
|
|
|
|
2021-11-07 18:16:38 -05:00
|
|
|
def fetch_bucket_cors_rules
|
|
|
|
begin
|
|
|
|
s3_resource.client.get_bucket_cors(bucket: @s3_bucket_name).cors_rules&.map(&:to_h) || []
|
|
|
|
rescue Aws::S3::Errors::NoSuchCORSConfiguration
|
|
|
|
# no rule
|
|
|
|
[]
|
|
|
|
end
|
|
|
|
end
|
|
|
|
|
2017-10-06 01:20:01 -04:00
|
|
|
def default_s3_options
|
|
|
|
if SiteSetting.enable_s3_uploads?
|
|
|
|
options = self.class.s3_options(SiteSetting)
|
|
|
|
check_missing_site_options
|
|
|
|
options
|
|
|
|
elsif GlobalSetting.use_s3?
|
|
|
|
self.class.s3_options(GlobalSetting)
|
|
|
|
else
|
|
|
|
{}
|
2014-09-24 16:52:09 -04:00
|
|
|
end
|
2017-10-06 01:20:01 -04:00
|
|
|
end
|
2014-09-24 16:52:09 -04:00
|
|
|
|
2017-10-06 01:20:01 -04:00
|
|
|
def get_path_for_s3_upload(path)
|
2021-08-25 18:50:49 -04:00
|
|
|
if @s3_bucket_folder_path && !path.starts_with?(@s3_bucket_folder_path) &&
|
|
|
|
!path.starts_with?(
|
|
|
|
File.join(FileStore::BaseStore::TEMPORARY_UPLOAD_PREFIX, @s3_bucket_folder_path),
|
|
|
|
)
|
|
|
|
return File.join(@s3_bucket_folder_path, path)
|
|
|
|
end
|
|
|
|
|
2017-10-06 01:20:01 -04:00
|
|
|
path
|
2016-08-15 10:04:24 -04:00
|
|
|
end
|
|
|
|
|
2018-12-19 00:32:32 -05:00
|
|
|
def multisite_upload_path
|
2020-04-22 14:04:45 -04:00
|
|
|
path = File.join("uploads", RailsMultisite::ConnectionManagement.current_db, "/")
|
2020-04-28 09:03:04 -04:00
|
|
|
return path if !Rails.env.test?
|
|
|
|
File.join(path, "test_#{ENV["TEST_ENV_NUMBER"].presence || "0"}", "/")
|
2018-12-19 00:32:32 -05:00
|
|
|
end
|
|
|
|
|
2016-08-15 10:04:24 -04:00
|
|
|
def s3_resource
|
2019-01-04 01:16:22 -05:00
|
|
|
Aws::S3::Resource.new(client: s3_client)
|
2016-08-15 04:06:29 -04:00
|
|
|
end
|
2014-09-24 16:52:09 -04:00
|
|
|
|
2016-08-15 04:06:29 -04:00
|
|
|
def s3_bucket
|
2018-08-07 23:26:05 -04:00
|
|
|
@s3_bucket ||=
|
|
|
|
begin
|
|
|
|
bucket = s3_resource.bucket(@s3_bucket_name)
|
|
|
|
bucket.create unless bucket.exists?
|
|
|
|
bucket
|
|
|
|
end
|
2016-08-15 04:06:29 -04:00
|
|
|
end
|
2016-08-17 04:16:00 -04:00
|
|
|
|
2017-10-06 01:20:01 -04:00
|
|
|
def check_missing_site_options
|
2016-08-17 04:16:00 -04:00
|
|
|
unless SiteSetting.s3_use_iam_profile
|
2017-10-06 01:20:01 -04:00
|
|
|
raise SettingMissing.new("access_key_id") if SiteSetting.s3_access_key_id.blank?
|
|
|
|
raise SettingMissing.new("secret_access_key") if SiteSetting.s3_secret_access_key.blank?
|
2016-08-17 04:16:00 -04:00
|
|
|
end
|
|
|
|
end
|
2014-09-24 16:52:09 -04:00
|
|
|
end
|