2019-05-02 18:17:27 -04:00
|
|
|
# frozen_string_literal: true
|
|
|
|
|
2013-10-14 08:27:41 -04:00
|
|
|
module Jobs
|
2019-10-02 00:01:53 -04:00
|
|
|
class CleanUpUploads < ::Jobs::Scheduled
|
2014-02-05 18:14:41 -05:00
|
|
|
every 1.hour
|
2013-10-14 08:27:41 -04:00
|
|
|
|
|
|
|
def execute(args)
|
2018-06-04 12:40:57 -04:00
|
|
|
grace_period = [SiteSetting.clean_orphan_uploads_grace_period_hours, 1].max
|
2018-06-04 13:06:52 -04:00
|
|
|
|
2018-06-04 12:40:57 -04:00
|
|
|
# always remove invalid upload records
|
|
|
|
Upload
|
2019-01-02 02:29:17 -05:00
|
|
|
.by_users
|
2018-06-04 12:40:57 -04:00
|
|
|
.where(
|
|
|
|
"retain_hours IS NULL OR created_at < current_timestamp - interval '1 hour' * retain_hours",
|
|
|
|
)
|
|
|
|
.where("created_at < ?", grace_period.hour.ago)
|
2018-06-04 12:43:00 -04:00
|
|
|
.where(url: "")
|
2018-07-02 00:41:53 -04:00
|
|
|
.find_each(&:destroy!)
|
2018-06-04 13:06:52 -04:00
|
|
|
|
2013-10-16 04:55:42 -04:00
|
|
|
return unless SiteSetting.clean_up_uploads?
|
2013-10-14 08:27:41 -04:00
|
|
|
|
2019-10-27 20:14:52 -04:00
|
|
|
if c = last_cleanup
|
|
|
|
return if (Time.zone.now.to_i - c) < (grace_period / 2).hours
|
|
|
|
end
|
|
|
|
|
2017-06-07 16:53:15 -04:00
|
|
|
base_url =
|
2023-01-09 07:20:10 -05:00
|
|
|
(
|
2017-06-07 16:53:15 -04:00
|
|
|
if Discourse.store.internal?
|
|
|
|
Discourse.store.relative_base_url
|
2023-01-09 07:20:10 -05:00
|
|
|
else
|
2017-06-07 16:53:15 -04:00
|
|
|
Discourse.store.absolute_base_url
|
2023-01-09 07:20:10 -05:00
|
|
|
end
|
|
|
|
)
|
2017-06-07 16:53:15 -04:00
|
|
|
s3_hostname = URI.parse(base_url).hostname
|
2017-10-06 01:20:01 -04:00
|
|
|
s3_cdn_hostname = URI.parse(SiteSetting.Upload.s3_cdn_url || "").hostname
|
2017-06-07 16:53:15 -04:00
|
|
|
|
2019-01-02 02:29:17 -05:00
|
|
|
result = Upload.by_users
|
2022-02-16 02:00:30 -05:00
|
|
|
Upload.unused_callbacks&.each { |handler| result = handler.call(result) }
|
|
|
|
result =
|
|
|
|
result
|
2019-01-02 02:29:17 -05:00
|
|
|
.where(
|
|
|
|
"uploads.retain_hours IS NULL OR uploads.created_at < current_timestamp - interval '1 hour' * uploads.retain_hours",
|
|
|
|
)
|
2016-11-01 23:14:02 -04:00
|
|
|
.where("uploads.created_at < ?", grace_period.hour.ago)
|
2020-01-15 22:50:27 -05:00
|
|
|
.where("uploads.access_control_post_id IS NULL")
|
2022-06-08 19:24:30 -04:00
|
|
|
.joins("LEFT JOIN upload_references ON upload_references.upload_id = uploads.id")
|
|
|
|
.where("upload_references.upload_id IS NULL")
|
2021-06-23 18:09:40 -04:00
|
|
|
.with_no_non_post_relations
|
2013-10-14 08:27:41 -04:00
|
|
|
|
2016-08-01 12:35:57 -04:00
|
|
|
result.find_each do |upload|
|
2022-06-08 19:24:30 -04:00
|
|
|
next if Upload.in_use_callbacks&.any? { |callback| callback.call(upload) }
|
|
|
|
|
2017-11-14 04:56:10 -05:00
|
|
|
if upload.sha1.present?
|
2022-06-08 19:24:30 -04:00
|
|
|
# TODO: Remove this check after UploadReferences records were created
|
2017-11-14 04:56:10 -05:00
|
|
|
encoded_sha = Base62.encode(upload.sha1.hex)
|
2022-11-01 15:05:13 -04:00
|
|
|
if ReviewableQueuedPost
|
|
|
|
.pending
|
|
|
|
.where(
|
|
|
|
"payload->>'raw' LIKE ? OR payload->>'raw' LIKE ?",
|
|
|
|
"%#{upload.sha1}%",
|
|
|
|
"%#{encoded_sha}%",
|
2023-01-09 07:20:10 -05:00
|
|
|
)
|
2022-11-01 15:05:13 -04:00
|
|
|
.exists?
|
|
|
|
next
|
2023-01-09 07:20:10 -05:00
|
|
|
end
|
2022-11-01 15:05:13 -04:00
|
|
|
if Draft.where(
|
|
|
|
"data LIKE ? OR data LIKE ?",
|
|
|
|
"%#{upload.sha1}%",
|
|
|
|
"%#{encoded_sha}%",
|
|
|
|
).exists?
|
|
|
|
next
|
2023-01-09 07:20:10 -05:00
|
|
|
end
|
2022-11-01 15:05:13 -04:00
|
|
|
if UserProfile.where(
|
|
|
|
"bio_raw LIKE ? OR bio_raw LIKE ?",
|
|
|
|
"%#{upload.sha1}%",
|
|
|
|
"%#{encoded_sha}%",
|
2023-01-09 07:20:10 -05:00
|
|
|
).exists?
|
|
|
|
next
|
|
|
|
end
|
2022-02-16 02:00:30 -05:00
|
|
|
|
2017-11-21 04:20:42 -05:00
|
|
|
upload.destroy
|
|
|
|
else
|
|
|
|
upload.delete
|
2017-11-14 04:56:10 -05:00
|
|
|
end
|
2016-07-01 03:22:30 -04:00
|
|
|
end
|
2019-10-27 20:14:52 -04:00
|
|
|
|
2021-07-27 18:42:25 -04:00
|
|
|
ExternalUploadStub.cleanup!
|
|
|
|
|
2019-10-27 20:14:52 -04:00
|
|
|
self.last_cleanup = Time.zone.now.to_i
|
|
|
|
end
|
|
|
|
|
|
|
|
def last_cleanup=(v)
|
2019-12-03 04:05:53 -05:00
|
|
|
Discourse.redis.setex(last_cleanup_key, 7.days.to_i, v.to_s)
|
2019-10-27 20:14:52 -04:00
|
|
|
end
|
|
|
|
|
|
|
|
def last_cleanup
|
2019-12-03 04:05:53 -05:00
|
|
|
v = Discourse.redis.get(last_cleanup_key)
|
2019-10-27 20:14:52 -04:00
|
|
|
v ? v.to_i : v
|
2016-07-01 03:22:30 -04:00
|
|
|
end
|
2019-10-27 20:14:52 -04:00
|
|
|
|
|
|
|
def reset_last_cleanup!
|
2019-12-03 04:05:53 -05:00
|
|
|
Discourse.redis.del(last_cleanup_key)
|
2019-10-27 20:14:52 -04:00
|
|
|
end
|
|
|
|
|
|
|
|
protected
|
|
|
|
|
|
|
|
def last_cleanup_key
|
|
|
|
"LAST_UPLOAD_CLEANUP"
|
|
|
|
end
|
2016-07-01 03:22:30 -04:00
|
|
|
end
|
2013-10-14 08:27:41 -04:00
|
|
|
end
|