REFACTOR: Restoring of backups and migration of uploads to S3

This commit is contained in:
Gerhard Schlager 2020-01-13 00:12:27 +01:00
parent f10078eab4
commit e474cda321
37 changed files with 2454 additions and 1029 deletions

View File

@ -1,5 +1,5 @@
name: CI name: CI
on: on:
push: push:
branches: branches:
@ -7,7 +7,7 @@ on:
pull_request: pull_request:
branches-ignore: branches-ignore:
- 'tests-passed' - 'tests-passed'
jobs: jobs:
build: build:
name: "${{ matrix.target }}-${{ matrix.build_types }}" name: "${{ matrix.target }}-${{ matrix.build_types }}"
@ -38,7 +38,7 @@ jobs:
services: services:
postgres: postgres:
image: postgres:${{ matrix.postgres }} image: postgres:${{ matrix.postgres }}
ports: ports:
- 5432:5432 - 5432:5432
env: env:
POSTGRES_USER: discourse POSTGRES_USER: discourse
@ -88,14 +88,14 @@ jobs:
key: ${{ runner.os }}-gem-${{ hashFiles('**/Gemfile.lock') }} key: ${{ runner.os }}-gem-${{ hashFiles('**/Gemfile.lock') }}
restore-keys: | restore-keys: |
${{ runner.os }}-gem- ${{ runner.os }}-gem-
- name: Setup gems - name: Setup gems
run: bundle install --without development --deployment --jobs 4 --retry 3 run: bundle install --without development --deployment --jobs 4 --retry 3
- name: Get yarn cache directory - name: Get yarn cache directory
id: yarn-cache-dir id: yarn-cache-dir
run: echo "::set-output name=dir::$(yarn cache dir)" run: echo "::set-output name=dir::$(yarn cache dir)"
- name: Yarn cache - name: Yarn cache
uses: actions/cache@v1 uses: actions/cache@v1
id: yarn-cache id: yarn-cache
@ -113,7 +113,7 @@ jobs:
run: bin/rake plugin:install_all_official run: bin/rake plugin:install_all_official
- name: Create database - name: Create database
if: env.BUILD_TYPE != 'LINT' if: env.BUILD_TYPE != 'LINT'
run: bin/rake db:create && bin/rake db:migrate run: bin/rake db:create && bin/rake db:migrate
- name: Create parallel databases - name: Create parallel databases
@ -123,7 +123,7 @@ jobs:
- name: Rubocop - name: Rubocop
if: env.BUILD_TYPE == 'LINT' if: env.BUILD_TYPE == 'LINT'
run: bundle exec rubocop . run: bundle exec rubocop .
- name: ESLint - name: ESLint
if: env.BUILD_TYPE == 'LINT' if: env.BUILD_TYPE == 'LINT'
run: yarn eslint app/assets/javascripts test/javascripts && yarn eslint --ext .es6 app/assets/javascripts test/javascripts plugins run: yarn eslint app/assets/javascripts test/javascripts && yarn eslint --ext .es6 app/assets/javascripts test/javascripts plugins
@ -133,7 +133,7 @@ jobs:
run: | run: |
yarn prettier -v yarn prettier -v
yarn prettier --list-different "app/assets/stylesheets/**/*.scss" "app/assets/javascripts/**/*.es6" "test/javascripts/**/*.es6" "plugins/**/*.scss" "plugins/**/*.es6" yarn prettier --list-different "app/assets/stylesheets/**/*.scss" "app/assets/javascripts/**/*.es6" "test/javascripts/**/*.es6" "plugins/**/*.scss" "plugins/**/*.es6"
- name: Core RSpec - name: Core RSpec
if: env.BUILD_TYPE == 'BACKEND' && env.TARGET == 'CORE' if: env.BUILD_TYPE == 'BACKEND' && env.TARGET == 'CORE'
run: bin/turbo_rspec && bin/rake plugin:spec run: bin/turbo_rspec && bin/rake plugin:spec
@ -146,12 +146,12 @@ jobs:
if: env.BUILD_TYPE == 'FRONTEND' && env.TARGET == 'CORE' if: env.BUILD_TYPE == 'FRONTEND' && env.TARGET == 'CORE'
run: bundle exec rake qunit:test['1200000'] run: bundle exec rake qunit:test['1200000']
timeout-minutes: 30 timeout-minutes: 30
- name: Wizard QUnit - name: Wizard QUnit
if: env.BUILD_TYPE == 'FRONTEND' && env.TARGET == 'CORE' if: env.BUILD_TYPE == 'FRONTEND' && env.TARGET == 'CORE'
run: bundle exec rake qunit:test['1200000','/wizard/qunit'] run: bundle exec rake qunit:test['1200000','/wizard/qunit']
timeout-minutes: 30 timeout-minutes: 30
- name: Plugin QUnit # Tests core plugins in TARGET=CORE, and all plugins in TARGET=PLUGINS - name: Plugin QUnit # Tests core plugins in TARGET=CORE, and all plugins in TARGET=PLUGINS
if: env.BUILD_TYPE == 'FRONTEND' if: env.BUILD_TYPE == 'FRONTEND'
run: bundle exec rake plugin:qunit run: bundle exec rake plugin:qunit

1
.gitignore vendored
View File

@ -32,6 +32,7 @@ config/discourse.conf
# Ignore the default SQLite database and db dumps # Ignore the default SQLite database and db dumps
*.sql *.sql
*.sql.gz *.sql.gz
!/spec/fixtures/**/*.sql
/db/*.sqlite3 /db/*.sqlite3
/db/structure.sql /db/structure.sql
/db/schema.rb /db/schema.rb

View File

@ -4,10 +4,8 @@ module BackupRestore
class OperationRunningError < RuntimeError; end class OperationRunningError < RuntimeError; end
VERSION_PREFIX = "v".freeze VERSION_PREFIX = "v"
DUMP_FILE = "dump.sql.gz".freeze DUMP_FILE = "dump.sql.gz"
OLD_DUMP_FILE = "dump.sql".freeze
METADATA_FILE = "meta.json"
LOGS_CHANNEL = "/admin/backups/logs" LOGS_CHANNEL = "/admin/backups/logs"
def self.backup!(user_id, opts = {}) def self.backup!(user_id, opts = {})
@ -19,7 +17,16 @@ module BackupRestore
end end
def self.restore!(user_id, opts = {}) def self.restore!(user_id, opts = {})
start! BackupRestore::Restorer.new(user_id, opts) restorer = BackupRestore::Restorer.new(
user_id: user_id,
filename: opts[:filename],
factory: BackupRestore::Factory.new(
user_id: user_id,
client_id: opts[:client_id]
)
)
start! restorer
end end
def self.rollback! def self.rollback!
@ -75,16 +82,18 @@ module BackupRestore
end end
def self.move_tables_between_schemas(source, destination) def self.move_tables_between_schemas(source, destination)
DB.exec(move_tables_between_schemas_sql(source, destination)) ActiveRecord::Base.transaction do
DB.exec(move_tables_between_schemas_sql(source, destination))
end
end end
def self.move_tables_between_schemas_sql(source, destination) def self.move_tables_between_schemas_sql(source, destination)
<<-SQL <<~SQL
DO $$DECLARE row record; DO $$DECLARE row record;
BEGIN BEGIN
-- create <destination> schema if it does not exists already -- create <destination> schema if it does not exists already
-- NOTE: DROP & CREATE SCHEMA is easier, but we don't want to drop the public schema -- NOTE: DROP & CREATE SCHEMA is easier, but we don't want to drop the public schema
-- ortherwise extensions (like hstore & pg_trgm) won't work anymore... -- otherwise extensions (like hstore & pg_trgm) won't work anymore...
CREATE SCHEMA IF NOT EXISTS #{destination}; CREATE SCHEMA IF NOT EXISTS #{destination};
-- move all <source> tables to <destination> schema -- move all <source> tables to <destination> schema
FOR row IN SELECT tablename FROM pg_tables WHERE schemaname = '#{source}' FOR row IN SELECT tablename FROM pg_tables WHERE schemaname = '#{source}'
@ -108,11 +117,17 @@ module BackupRestore
config = ActiveRecord::Base.connection_pool.spec.config config = ActiveRecord::Base.connection_pool.spec.config
config = config.with_indifferent_access config = config.with_indifferent_access
# credentials for PostgreSQL in CI environment
if Rails.env.test?
username = ENV["PGUSER"]
password = ENV["PGPASSWORD"]
end
DatabaseConfiguration.new( DatabaseConfiguration.new(
config["backup_host"] || config["host"], config["backup_host"] || config["host"],
config["backup_port"] || config["port"], config["backup_port"] || config["port"],
config["username"] || ENV["USER"] || "postgres", config["username"] || username || ENV["USER"] || "postgres",
config["password"], config["password"] || password,
config["database"] config["database"]
) )
end end

View File

@ -0,0 +1,96 @@
# frozen_string_literal: true
module BackupRestore
class BackupFileHandler
OLD_DUMP_FILENAME = "dump.sql"
delegate :log, to: :@logger, private: true
def initialize(logger, filename, current_db, root_tmp_directory = Rails.root)
@logger = logger
@filename = filename
@current_db = current_db
@root_tmp_directory = root_tmp_directory
@is_archive = !(@filename =~ /\.sql\.gz$/)
end
def decompress
create_tmp_directory
@archive_path = File.join(@tmp_directory, @filename)
copy_archive_to_tmp_directory
decompress_archive
extract_db_dump
[@tmp_directory, @db_dump_path]
end
def clean_up
return if @tmp_directory.blank?
log "Removing tmp '#{@tmp_directory}' directory..."
FileUtils.rm_rf(@tmp_directory) if Dir[@tmp_directory].present?
rescue => ex
log "Something went wrong while removing the following tmp directory: #{@tmp_directory}", ex
end
protected
def create_tmp_directory
timestamp = Time.zone.now.strftime("%Y-%m-%d-%H%M%S")
@tmp_directory = File.join(@root_tmp_directory, "tmp", "restores", @current_db, timestamp)
ensure_directory_exists(@tmp_directory)
end
def ensure_directory_exists(directory)
log "Making sure #{directory} exists..."
FileUtils.mkdir_p(directory)
end
def copy_archive_to_tmp_directory
store = BackupRestore::BackupStore.create
if store.remote?
log "Downloading archive to tmp directory..."
failure_message = "Failed to download archive to tmp directory."
else
log "Copying archive to tmp directory..."
failure_message = "Failed to copy archive to tmp directory."
end
store.download_file(@filename, @archive_path, failure_message)
end
def decompress_archive
return if !@is_archive
log "Unzipping archive, this may take a while..."
pipeline = Compression::Pipeline.new([Compression::Tar.new, Compression::Gzip.new])
unzipped_path = pipeline.decompress(@tmp_directory, @archive_path, available_size)
pipeline.strip_directory(unzipped_path, @tmp_directory)
end
def extract_db_dump
@db_dump_path =
if @is_archive
# for compatibility with backups from Discourse v1.5 and below
old_dump_path = File.join(@tmp_directory, OLD_DUMP_FILENAME)
File.exists?(old_dump_path) ? old_dump_path : File.join(@tmp_directory, BackupRestore::DUMP_FILE)
else
File.join(@tmp_directory, @filename)
end
if File.extname(@db_dump_path) == '.gz'
log "Extracting dump file..."
Compression::Gzip.new.decompress(@tmp_directory, @db_dump_path, available_size)
@db_dump_path.delete_suffix!('.gz')
end
@db_dump_path
end
def available_size
SiteSetting.decompressed_backup_max_file_size_mb
end
end
end

View File

@ -3,8 +3,8 @@
module BackupRestore module BackupRestore
# @abstract # @abstract
class BackupStore class BackupStore
class BackupFileExists < RuntimeError; end BackupFileExists = Class.new(RuntimeError)
class StorageError < RuntimeError; end StorageError = Class.new(RuntimeError)
# @return [BackupStore] # @return [BackupStore]
def self.create(opts = {}) def self.create(opts = {})

View File

@ -0,0 +1,182 @@
# frozen_string_literal: true
module BackupRestore
DatabaseRestoreError = Class.new(RuntimeError)
class DatabaseRestorer
delegate :log, to: :@logger, private: true
MAIN_SCHEMA = "public"
BACKUP_SCHEMA = "backup"
def initialize(logger, current_db)
@logger = logger
@db_was_changed = false
@current_db = current_db
end
def restore(db_dump_path)
BackupRestore.move_tables_between_schemas(MAIN_SCHEMA, BACKUP_SCHEMA)
@db_dump_path = db_dump_path
@db_was_changed = true
create_missing_discourse_functions
restore_dump
migrate_database
reconnect_database
end
def rollback
log "Trying to rollback..."
if @db_was_changed && BackupRestore.can_rollback?
log "Rolling back..."
BackupRestore.move_tables_between_schemas(BACKUP_SCHEMA, MAIN_SCHEMA)
else
log "There was no need to rollback"
end
end
def clean_up
drop_created_discourse_functions
end
protected
def restore_dump
log "Restoring dump file... (this may take a while)"
logs = Queue.new
last_line = nil
psql_running = true
log_thread = Thread.new do
RailsMultisite::ConnectionManagement::establish_connection(db: @current_db)
while psql_running
message = logs.pop.strip
log(message) if message.present?
end
end
IO.popen(restore_dump_command) do |pipe|
begin
while line = pipe.readline
logs << line
last_line = line
end
rescue EOFError
# finished reading...
ensure
psql_running = false
end
end
logs << ""
log_thread.join
raise DatabaseRestoreError.new("psql failed: #{last_line}") if Process.last_status&.exitstatus != 0
end
# Removes unwanted SQL added by certain versions of pg_dump.
def sed_command
unwanted_sql = [
"DROP SCHEMA", # Discourse <= v1.5
"CREATE SCHEMA", # PostgreSQL 11+
"COMMENT ON SCHEMA", # PostgreSQL 11+
"SET default_table_access_method" # PostgreSQL 12
].join("|")
"sed -E '/^(#{unwanted_sql})/d'"
end
def restore_dump_command
"#{sed_command} #{@db_dump_path} | #{psql_command} 2>&1"
end
def psql_command
db_conf = BackupRestore.database_configuration
password_argument = "PGPASSWORD='#{db_conf.password}'" if db_conf.password.present?
host_argument = "--host=#{db_conf.host}" if db_conf.host.present?
port_argument = "--port=#{db_conf.port}" if db_conf.port.present?
username_argument = "--username=#{db_conf.username}" if db_conf.username.present?
[ password_argument, # pass the password to psql (if any)
"psql", # the psql command
"--dbname='#{db_conf.database}'", # connect to database *dbname*
"--single-transaction", # all or nothing (also runs COPY commands faster)
"--variable=ON_ERROR_STOP=1", # stop on first error
host_argument, # the hostname to connect to (if any)
port_argument, # the port to connect to (if any)
username_argument # the username to connect as (if any)
].compact.join(" ")
end
def migrate_database
log "Migrating the database..."
log Discourse::Utils.execute_command(
{ "SKIP_POST_DEPLOYMENT_MIGRATIONS" => "0" },
"rake db:migrate",
failure_message: "Failed to migrate database.",
chdir: Rails.root
)
end
def reconnect_database
log "Reconnecting to the database..."
RailsMultisite::ConnectionManagement::reload if RailsMultisite::ConnectionManagement::instance
RailsMultisite::ConnectionManagement::establish_connection(db: @current_db)
end
def create_missing_discourse_functions
log "Creating missing functions in the discourse_functions schema..."
@created_functions_for_table_columns = []
all_readonly_table_columns = []
Dir[Rails.root.join(Migration::SafeMigrate.post_migration_path, "**/*.rb")].each do |path|
require path
class_name = File.basename(path, ".rb").sub(/^\d+_/, "").camelize
migration_class = class_name.constantize
if migration_class.const_defined?(:DROPPED_TABLES)
migration_class::DROPPED_TABLES.each do |table_name|
all_readonly_table_columns << [table_name]
end
end
if migration_class.const_defined?(:DROPPED_COLUMNS)
migration_class::DROPPED_COLUMNS.each do |table_name, column_names|
column_names.each do |column_name|
all_readonly_table_columns << [table_name, column_name]
end
end
end
end
existing_function_names = Migration::BaseDropper.existing_discourse_function_names.map { |name| "#{name}()" }
all_readonly_table_columns.each do |table_name, column_name|
function_name = Migration::BaseDropper.readonly_function_name(table_name, column_name, with_schema: false)
if !existing_function_names.include?(function_name)
Migration::BaseDropper.create_readonly_function(table_name, column_name)
@created_functions_for_table_columns << [table_name, column_name]
end
end
end
def drop_created_discourse_functions
return if @created_functions_for_table_columns.blank?
log "Dropping functions from the discourse_functions schema..."
@created_functions_for_table_columns.each do |table_name, column_name|
Migration::BaseDropper.drop_readonly_function(table_name, column_name)
end
rescue => ex
log "Something went wrong while dropping functions from the discourse_functions schema", ex
end
end
end

View File

@ -0,0 +1,34 @@
# frozen_string_literal: true
module BackupRestore
class Factory
def initialize(user_id: nil, client_id: nil)
@user_id = user_id
@client_id = client_id
end
def logger
@logger ||= Logger.new(user_id: @user_id, client_id: @client_id)
end
def create_system_interface
SystemInterface.new(logger)
end
def create_uploads_restorer
UploadsRestorer.new(logger)
end
def create_database_restorer(current_db)
DatabaseRestorer.new(logger, current_db)
end
def create_meta_data_handler(filename, tmp_directory)
MetaDataHandler.new(logger, filename, tmp_directory)
end
def create_backup_file_handler(filename, current_db)
BackupFileHandler.new(logger, filename, current_db)
end
end
end

View File

@ -0,0 +1,37 @@
# frozen_string_literal: true
module BackupRestore
class Logger
attr_reader :logs
def initialize(user_id: nil, client_id: nil)
@user_id = user_id
@client_id = client_id
@publish_to_message_bus = @user_id.present? && @client_id.present?
@logs = []
end
def log(message, ex = nil)
return if Rails.env.test?
timestamp = Time.now.strftime("%Y-%m-%d %H:%M:%S")
puts(message)
publish_log(message, timestamp)
save_log(message, timestamp)
Rails.logger.error("#{ex}\n" + ex.backtrace.join("\n")) if ex
end
protected
def publish_log(message, timestamp)
return unless @publish_to_message_bus
data = { timestamp: timestamp, operation: "restore", message: message }
MessageBus.publish(BackupRestore::LOGS_CHANNEL, data, user_ids: [@user_id], client_ids: [@client_id])
end
def save_log(message, timestamp)
@logs << "[#{timestamp}] #{message}"
end
end
end

View File

@ -0,0 +1,60 @@
# frozen_string_literal: true
module BackupRestore
MetaDataError = Class.new(RuntimeError)
MigrationRequiredError = Class.new(RuntimeError)
class MetaDataHandler
METADATA_FILE = "meta.json"
delegate :log, to: :@logger, private: true
def initialize(logger, filename, tmp_directory)
@logger = logger
@current_version = BackupRestore.current_version
@filename = filename
@tmp_directory = tmp_directory
end
def validate
metadata = extract_metadata
log "Validating metadata..."
log " Current version: #{@current_version}"
log " Restored version: #{metadata[:version]}"
if metadata[:version] > @current_version
raise MigrationRequiredError.new("You're trying to restore a more recent version of the schema. " \
"You should migrate first!")
end
metadata
end
protected
# Tries to extract the backup version from an existing
# metadata file (used in Discourse < v1.6) or from the filename.
def extract_metadata
metadata_path = File.join(@tmp_directory, METADATA_FILE) if @tmp_directory.present?
if metadata_path.present? && File.exists?(metadata_path)
metadata = load_metadata_file(metadata_path)
elsif @filename =~ /-#{BackupRestore::VERSION_PREFIX}(\d{14})/
metadata = { version: Regexp.last_match[1].to_i }
else
raise MetaDataError.new("Migration version is missing from the filename.")
end
metadata
end
def load_metadata_file(path)
metadata = Oj.load_file(path, symbol_keys: true)
raise MetaDataError.new("Failed to load metadata file.") if metadata.blank?
metadata
rescue Oj::ParseError
raise MetaDataError.new("Failed to parse metadata file.")
end
end
end

View File

@ -1,107 +1,74 @@
# frozen_string_literal: true # frozen_string_literal: true
require_dependency "db_helper"
module BackupRestore module BackupRestore
RestoreDisabledError = Class.new(RuntimeError)
class RestoreDisabledError < RuntimeError; end FilenameMissingError = Class.new(RuntimeError)
class FilenameMissingError < RuntimeError; end
class Restorer class Restorer
delegate :log, to: :@logger, private: true
attr_reader :success attr_reader :success
def self.pg_produces_portable_dump?(version) def initialize(user_id:, filename:, factory:, disable_emails: true)
# anything pg 11 or above will produce a non-portable dump
return false if version.to_i >= 11
# below 11, the behaviour was changed in multiple different minor
# versions depending on major release line - we list those versions below
gem_version = Gem::Version.new(version)
%w{
10.3
9.6.8
9.5.12
9.4.17
9.3.22
}.each do |unportable_version|
return false if Gem::Dependency.new("", "~> #{unportable_version}").match?("", gem_version)
end
true
end
def initialize(user_id, opts = {})
@user_id = user_id @user_id = user_id
@client_id = opts[:client_id] @filename = filename
@filename = opts[:filename] @factory = factory
@publish_to_message_bus = opts[:publish_to_message_bus] || false @logger = factory.logger
@disable_emails = opts.fetch(:disable_emails, true) @disable_emails = disable_emails
ensure_restore_is_enabled ensure_restore_is_enabled
ensure_no_operation_is_running
ensure_we_have_a_user ensure_we_have_a_user
ensure_we_have_a_filename ensure_we_have_a_filename
initialize_state @success = false
@current_db = RailsMultisite::ConnectionManagement.current_db
@system = factory.create_system_interface
@backup_file_handler = factory.create_backup_file_handler(@filename, @current_db)
@database_restorer = factory.create_database_restorer(@current_db)
@uploads_restorer = factory.create_uploads_restorer
end end
def run def run
log "[STARTED]" log "[STARTED]"
log "'#{@user_info[:username]}' has started the restore!" log "'#{@user_info[:username]}' has started the restore!"
mark_restore_as_running # FIXME not atomic!
ensure_no_operation_is_running
@system.mark_restore_as_running
listen_for_shutdown_signal @system.listen_for_shutdown_signal
ensure_directory_exists(@tmp_directory) @tmp_directory, db_dump_path = @backup_file_handler.decompress
validate_backup_metadata
copy_archive_to_tmp_directory @system.enable_readonly_mode
decompress_archive @system.pause_sidekiq
@system.wait_for_sidekiq
extract_metadata @database_restorer.restore(db_dump_path)
validate_metadata
extract_dump
create_missing_discourse_functions
if !can_restore_into_different_schema?
log "Cannot restore into different schema, restoring in-place"
enable_readonly_mode
pause_sidekiq
wait_for_sidekiq
BackupRestore.move_tables_between_schemas("public", "backup")
@db_was_changed = true
restore_dump
else
log "Restoring into 'backup' schema"
restore_dump
enable_readonly_mode
pause_sidekiq
wait_for_sidekiq
switch_schema!
end
migrate_database
reconnect_database
reload_site_settings reload_site_settings
@system.disable_readonly_mode
clear_emoji_cache clear_emoji_cache
disable_readonly_mode
clear_theme_cache clear_theme_cache
extract_uploads @uploads_restorer.restore(@tmp_directory)
after_restore_hook after_restore_hook
rescue Compression::Strategy::ExtractFailed rescue Compression::Strategy::ExtractFailed
log "The uncompressed file is too big. Consider increasing the decompressed_theme_max_file_size_mb hidden setting." log 'ERROR: The uncompressed file is too big. Consider increasing the hidden ' \
rollback '"decompressed_backup_max_file_size_mb" setting.'
@database_restorer.rollback
rescue SystemExit rescue SystemExit
log "Restore process was cancelled!" log "Restore process was cancelled!"
rollback @database_restorer.rollback
rescue => ex rescue => ex
log "EXCEPTION: " + ex.message log "EXCEPTION: " + ex.message
log ex.backtrace.join("\n") log ex.backtrace.join("\n")
rollback @database_restorer.rollback
else else
@success = true @success = true
ensure ensure
@ -112,78 +79,11 @@ module BackupRestore
@success ? log("[SUCCESS]") : log("[FAILED]") @success ? log("[SUCCESS]") : log("[FAILED]")
end end
### The methods listed below are public just for testing purposes.
### This is not a good practice, but we need to be sure that our new compression API will work.
attr_reader :tmp_directory
def ensure_directory_exists(directory)
log "Making sure #{directory} exists..."
FileUtils.mkdir_p(directory)
end
def copy_archive_to_tmp_directory
if @store.remote?
log "Downloading archive to tmp directory..."
failure_message = "Failed to download archive to tmp directory."
else
log "Copying archive to tmp directory..."
failure_message = "Failed to copy archive to tmp directory."
end
@store.download_file(@filename, @archive_filename, failure_message)
end
def decompress_archive
return unless @is_archive
log "Unzipping archive, this may take a while..."
pipeline = Compression::Pipeline.new([Compression::Tar.new, Compression::Gzip.new])
unzipped_path = pipeline.decompress(@tmp_directory, @archive_filename, available_size)
pipeline.strip_directory(unzipped_path, @tmp_directory)
end
def extract_metadata
metadata_path = File.join(@tmp_directory, BackupRestore::METADATA_FILE)
@metadata = if File.exists?(metadata_path)
data = Oj.load_file(@meta_filename)
raise "Failed to load metadata file." if !data
data
else
log "No metadata file to extract."
if @filename =~ /-#{BackupRestore::VERSION_PREFIX}(\d{14})/
{ "version" => Regexp.last_match[1].to_i }
else
raise "Migration version is missing from the filename."
end
end
end
def extract_dump
@dump_filename =
if @is_archive
# For backwards compatibility
old_dump_path = File.join(@tmp_directory, BackupRestore::OLD_DUMP_FILE)
File.exists?(old_dump_path) ? old_dump_path : File.join(@tmp_directory, BackupRestore::DUMP_FILE)
else
File.join(@tmp_directory, @filename)
end
log "Extracting dump file..."
Compression::Gzip.new.decompress(@tmp_directory, @dump_filename, available_size)
end
protected protected
def available_size
SiteSetting.decompressed_backup_max_file_size_mb
end
def ensure_restore_is_enabled def ensure_restore_is_enabled
raise BackupRestore::RestoreDisabledError unless Rails.env.development? || SiteSetting.allow_restore? return if Rails.env.development? || SiteSetting.allow_restore?
raise BackupRestore::RestoreDisabledError
end end
def ensure_no_operation_is_running def ensure_no_operation_is_running
@ -192,7 +92,8 @@ module BackupRestore
def ensure_we_have_a_user def ensure_we_have_a_user
user = User.find_by(id: @user_id) user = User.find_by(id: @user_id)
raise Discourse::InvalidParameters.new(:user_id) unless user raise Discourse::InvalidParameters.new(:user_id) if user.blank?
# keep some user data around to check them against the newly restored database # keep some user data around to check them against the newly restored database
@user_info = { id: user.id, username: user.username, email: user.email } @user_info = { id: user.id, username: user.username, email: user.email }
end end
@ -201,211 +102,8 @@ module BackupRestore
raise BackupRestore::FilenameMissingError if @filename.nil? raise BackupRestore::FilenameMissingError if @filename.nil?
end end
def initialize_state def validate_backup_metadata
@success = false @factory.create_meta_data_handler(@filename, @tmp_directory).validate
@store = BackupRestore::BackupStore.create
@db_was_changed = false
@current_db = RailsMultisite::ConnectionManagement.current_db
@current_version = BackupRestore.current_version
@timestamp = Time.now.strftime("%Y-%m-%d-%H%M%S")
@tmp_directory = File.join(Rails.root, "tmp", "restores", @current_db, @timestamp)
@archive_filename = File.join(@tmp_directory, @filename)
@tar_filename = @archive_filename[0...-3]
@is_archive = !(@filename =~ /.sql.gz$/)
@logs = []
@readonly_mode_was_enabled = Discourse.readonly_mode?
@created_functions_for_table_columns = []
end
def listen_for_shutdown_signal
Thread.new do
while BackupRestore.is_operation_running?
exit if BackupRestore.should_shutdown?
sleep 0.1
end
end
end
def mark_restore_as_running
log "Marking restore as running..."
BackupRestore.mark_as_running!
end
def enable_readonly_mode
return if @readonly_mode_was_enabled
log "Enabling readonly mode..."
Discourse.enable_readonly_mode
end
def pause_sidekiq
log "Pausing sidekiq..."
Sidekiq.pause!
end
def wait_for_sidekiq
log "Waiting for sidekiq to finish running jobs..."
iterations = 1
while sidekiq_has_running_jobs?
log "Waiting for sidekiq to finish running jobs... ##{iterations}"
sleep 5
iterations += 1
raise "Sidekiq did not finish running all the jobs in the allowed time!" if iterations > 6
end
end
def sidekiq_has_running_jobs?
Sidekiq::Workers.new.each do |_, _, worker|
payload = worker.try(:payload)
return true if payload.try(:all_sites)
return true if payload.try(:current_site_id) == @current_db
end
false
end
def validate_metadata
log "Validating metadata..."
log " Current version: #{@current_version}"
raise "Metadata has not been extracted correctly." if !@metadata
log " Restored version: #{@metadata["version"]}"
error = "You're trying to restore a more recent version of the schema. You should migrate first!"
raise error if @metadata["version"] > @current_version
end
def get_dumped_by_version
output = Discourse::Utils.execute_command(
File.extname(@dump_filename) == '.gz' ? 'zgrep' : 'grep',
'-m1', @dump_filename, '-e', "-- Dumped by pg_dump version",
failure_message: "Failed to check version of pg_dump used to generate the dump file"
)
output.match(/version (\d+(\.\d+)+)/)[1]
end
def can_restore_into_different_schema?
self.class.pg_produces_portable_dump?(get_dumped_by_version)
end
def restore_dump_command
if File.extname(@dump_filename) == '.gz'
"#{sed_command} #{@dump_filename.gsub('.gz', '')} | #{psql_command} 2>&1"
else
"#{psql_command} 2>&1 < #{@dump_filename}"
end
end
def restore_dump
log "Restoring dump file... (can be quite long)"
logs = Queue.new
psql_running = true
has_error = false
Thread.new do
RailsMultisite::ConnectionManagement::establish_connection(db: @current_db)
while psql_running
message = logs.pop.strip
has_error ||= (message =~ /ERROR:/)
log(message) unless message.blank?
end
end
IO.popen(restore_dump_command) do |pipe|
begin
while line = pipe.readline
logs << line
end
rescue EOFError
# finished reading...
ensure
psql_running = false
logs << ""
end
end
# psql does not return a valid exit code when an error happens
raise "psql failed" if has_error
end
def psql_command
db_conf = BackupRestore.database_configuration
password_argument = "PGPASSWORD='#{db_conf.password}'" if db_conf.password.present?
host_argument = "--host=#{db_conf.host}" if db_conf.host.present?
port_argument = "--port=#{db_conf.port}" if db_conf.port.present?
username_argument = "--username=#{db_conf.username}" if db_conf.username.present?
[ password_argument, # pass the password to psql (if any)
"psql", # the psql command
"--dbname='#{db_conf.database}'", # connect to database *dbname*
"--single-transaction", # all or nothing (also runs COPY commands faster)
host_argument, # the hostname to connect to (if any)
port_argument, # the port to connect to (if any)
username_argument # the username to connect as (if any)
].join(" ")
end
def sed_command
# in order to limit the downtime when restoring as much as possible
# we force the restoration to happen in the "restore" schema
# during the restoration, this make sure we
# - drop the "restore" schema if it exists
# - create the "restore" schema
# - prepend the "restore" schema into the search_path
regexp = "SET search_path = public, pg_catalog;"
replacement = [ "DROP SCHEMA IF EXISTS restore CASCADE;",
"CREATE SCHEMA restore;",
"SET search_path = restore, public, pg_catalog;",
].join(" ")
# we only want to replace the VERY first occurence of the search_path command
expression = "1,/^#{regexp}$/s/#{regexp}/#{replacement}/"
"sed -e '#{expression}'"
end
def switch_schema!
log "Switching schemas... try reloading the site in 5 minutes, if successful, then reboot and restore is complete."
sql = [
"BEGIN;",
BackupRestore.move_tables_between_schemas_sql("public", "backup"),
BackupRestore.move_tables_between_schemas_sql("restore", "public"),
"COMMIT;"
].join("\n")
@db_was_changed = true
DB.exec(sql)
end
def migrate_database
log "Migrating the database..."
if Discourse.skip_post_deployment_migrations?
ENV["SKIP_POST_DEPLOYMENT_MIGRATIONS"] = "0"
Rails.application.config.paths['db/migrate'] << Rails.root.join(
Discourse::DB_POST_MIGRATE_PATH
).to_s
end
Discourse::Application.load_tasks
ENV["VERSION"] = @current_version.to_s
DB.exec("SET search_path = public, pg_catalog;")
Rake::Task["db:migrate"].invoke
end
def reconnect_database
log "Reconnecting to the database..."
RailsMultisite::ConnectionManagement::reload if RailsMultisite::ConnectionManagement::instance
RailsMultisite::ConnectionManagement::establish_connection(db: @current_db)
end end
def reload_site_settings def reload_site_settings
@ -426,201 +124,30 @@ module BackupRestore
Emoji.clear_cache Emoji.clear_cache
end end
def extract_uploads
return unless File.exists?(File.join(@tmp_directory, 'uploads'))
log "Extracting uploads..."
public_uploads_path = File.join(Rails.root, "public")
upload_path = Discourse.store.upload_path
FileUtils.mkdir_p(File.join(public_uploads_path, "uploads"))
tmp_uploads_path = Dir.glob(File.join(@tmp_directory, "uploads", "*")).first
return if tmp_uploads_path.blank?
previous_db_name = BackupMetadata.value_for("db_name") || File.basename(tmp_uploads_path)
optimized_images_exist = File.exist?(File.join(tmp_uploads_path, 'optimized'))
Discourse::Utils.execute_command(
'rsync', '-avp', '--safe-links', "#{tmp_uploads_path}/", "#{upload_path}/",
failure_message: "Failed to restore uploads.",
chdir: public_uploads_path
)
remap_uploads(previous_db_name, upload_path)
if SiteSetting.Upload.enable_s3_uploads
migrate_to_s3
remove_local_uploads(File.join(public_uploads_path, upload_path))
end
generate_optimized_images unless optimized_images_exist
end
def remap_uploads(previous_db_name, upload_path)
log "Remapping uploads..."
was_multisite = BackupMetadata.value_for("multisite") == "t"
uploads_folder = was_multisite ? "/" : "/#{upload_path}/"
if (old_base_url = BackupMetadata.value_for("base_url")) && old_base_url != Discourse.base_url
remap(old_base_url, Discourse.base_url)
end
current_s3_base_url = SiteSetting.Upload.enable_s3_uploads ? SiteSetting.Upload.s3_base_url : nil
if (old_s3_base_url = BackupMetadata.value_for("s3_base_url")) && old_base_url != current_s3_base_url
remap("#{old_s3_base_url}/", uploads_folder)
end
current_s3_cdn_url = SiteSetting.Upload.enable_s3_uploads ? SiteSetting.Upload.s3_cdn_url : nil
if (old_s3_cdn_url = BackupMetadata.value_for("s3_cdn_url")) && old_s3_cdn_url != current_s3_cdn_url
base_url = SiteSetting.Upload.enable_s3_uploads ? SiteSetting.Upload.s3_cdn_url : Discourse.base_url
remap("#{old_s3_cdn_url}/", UrlHelper.schemaless("#{base_url}#{uploads_folder}"))
old_host = URI.parse(old_s3_cdn_url).host
new_host = URI.parse(base_url).host
remap(old_host, new_host)
end
if (old_cdn_url = BackupMetadata.value_for("cdn_url")) && old_cdn_url != Discourse.asset_host
base_url = Discourse.asset_host || Discourse.base_url
remap("#{old_cdn_url}/", UrlHelper.schemaless("#{base_url}/"))
old_host = URI.parse(old_cdn_url).host
new_host = URI.parse(base_url).host
remap(old_host, new_host)
end
current_db_name = RailsMultisite::ConnectionManagement.current_db
if previous_db_name != current_db_name
remap("uploads/#{previous_db_name}", upload_path)
end
rescue => ex
log "Something went wrong while remapping uploads.", ex
end
def remap(from, to)
puts "Remapping '#{from}' to '#{to}'"
DbHelper.remap(from, to, verbose: true, excluded_tables: ["backup_metadata"])
end
def migrate_to_s3
log "Migrating uploads to S3..."
ENV["SKIP_FAILED"] = "1"
ENV["MIGRATE_TO_MULTISITE"] = "1" if Rails.configuration.multisite
Rake::Task["uploads:migrate_to_s3"].invoke
Jobs.run_later!
end
def remove_local_uploads(directory)
log "Removing local uploads directory..."
FileUtils.rm_rf(directory) if Dir[directory].present?
rescue => ex
log "Something went wrong while removing the following uploads directory: #{directory}", ex
end
def generate_optimized_images
log 'Optimizing site icons...'
DB.exec("TRUNCATE TABLE optimized_images")
SiteIconManager.ensure_optimized!
log 'Posts will be rebaked by a background job in sidekiq. You will see missing images until that has completed.'
log 'You can expedite the process by manually running "rake posts:rebake_uncooked_posts"'
DB.exec(<<~SQL)
UPDATE posts
SET baked_version = NULL
WHERE id IN (SELECT post_id FROM post_uploads)
SQL
User.where("uploaded_avatar_id IS NOT NULL").find_each do |user|
Jobs.enqueue(:create_avatar_thumbnails, upload_id: user.uploaded_avatar_id)
end
end
def rollback
log "Trying to rollback..."
if @db_was_changed && BackupRestore.can_rollback?
log "Rolling back..."
BackupRestore.move_tables_between_schemas("backup", "public")
else
log "There was no need to rollback"
end
end
def notify_user def notify_user
if user = User.find_by_email(@user_info[:email]) if user = User.find_by_email(@user_info[:email])
log "Notifying '#{user.username}' of the end of the restore..." log "Notifying '#{user.username}' of the end of the restore..."
status = @success ? :restore_succeeded : :restore_failed status = @success ? :restore_succeeded : :restore_failed
SystemMessage.create_from_system_user(user, status, SystemMessage.create_from_system_user(
user, status,
logs: Discourse::Utils.pretty_logs(@logs) logs: Discourse::Utils.pretty_logs(@logs)
) )
else else
log "Could not send notification to '#{@user_info[:username]}' (#{@user_info[:email]}), because the user does not exists..." log "Could not send notification to '#{@user_info[:username]}' " \
"(#{@user_info[:email]}), because the user does not exist."
end end
rescue => ex rescue => ex
log "Something went wrong while notifying user.", ex log "Something went wrong while notifying user.", ex
end end
def create_missing_discourse_functions
log "Creating missing functions in the discourse_functions schema"
all_readonly_table_columns = []
Dir[Rails.root.join(Discourse::DB_POST_MIGRATE_PATH, "*.rb")].each do |path|
require path
class_name = File.basename(path, ".rb").sub(/^\d+_/, "").camelize
migration_class = class_name.constantize
if migration_class.const_defined?(:DROPPED_TABLES)
migration_class::DROPPED_TABLES.each do |table_name|
all_readonly_table_columns << [table_name]
end
end
if migration_class.const_defined?(:DROPPED_COLUMNS)
migration_class::DROPPED_COLUMNS.each do |table_name, column_names|
column_names.each do |column_name|
all_readonly_table_columns << [table_name, column_name]
end
end
end
end
existing_function_names = Migration::BaseDropper.existing_discourse_function_names.map { |name| "#{name}()" }
all_readonly_table_columns.each do |table_name, column_name|
function_name = Migration::BaseDropper.readonly_function_name(table_name, column_name, with_schema: false)
if !existing_function_names.include?(function_name)
Migration::BaseDropper.create_readonly_function(table_name, column_name)
@created_functions_for_table_columns << [table_name, column_name]
end
end
end
def clean_up def clean_up
log "Cleaning stuff up..." log "Cleaning stuff up..."
drop_created_discourse_functions @database_restorer.clean_up
remove_tmp_directory @backup_file_handler.clean_up
unpause_sidekiq @system.unpause_sidekiq
disable_readonly_mode if Discourse.readonly_mode? @system.disable_readonly_mode if Discourse.readonly_mode?
mark_restore_as_not_running @system.mark_restore_as_not_running
end
def remove_tmp_directory
log "Removing tmp '#{@tmp_directory}' directory..."
FileUtils.rm_rf(@tmp_directory) if Dir[@tmp_directory].present?
rescue => ex
log "Something went wrong while removing the following tmp directory: #{@tmp_directory}", ex
end
def unpause_sidekiq
log "Unpausing sidekiq..."
Sidekiq.unpause!
rescue => ex
log "Something went wrong while unpausing Sidekiq.", ex
end end
def clear_theme_cache def clear_theme_cache
@ -630,54 +157,9 @@ module BackupRestore
Stylesheet::Manager.cache.clear Stylesheet::Manager.cache.clear
end end
def drop_created_discourse_functions
log "Dropping function from the discourse_functions schema"
@created_functions_for_table_columns.each do |table_name, column_name|
Migration::BaseDropper.drop_readonly_function(table_name, column_name)
end
rescue => ex
log "Something went wrong while dropping functions from the discourse_functions schema", ex
end
def disable_readonly_mode
return if @readonly_mode_was_enabled
log "Disabling readonly mode..."
Discourse.disable_readonly_mode
rescue => ex
log "Something went wrong while disabling readonly mode.", ex
end
def mark_restore_as_not_running
log "Marking restore as finished..."
BackupRestore.mark_as_not_running!
rescue => ex
log "Something went wrong while marking restore as finished.", ex
end
def after_restore_hook def after_restore_hook
log "Executing the after_restore_hook..." log "Executing the after_restore_hook..."
DiscourseEvent.trigger(:restore_complete) DiscourseEvent.trigger(:restore_complete)
end end
def log(message, ex = nil)
return if Rails.env.test?
timestamp = Time.now.strftime("%Y-%m-%d %H:%M:%S")
puts(message)
publish_log(message, timestamp)
save_log(message, timestamp)
Rails.logger.error("#{ex}\n" + ex.backtrace.join("\n")) if ex
end
def publish_log(message, timestamp)
return unless @publish_to_message_bus
data = { timestamp: timestamp, operation: "restore", message: message }
MessageBus.publish(BackupRestore::LOGS_CHANNEL, data, user_ids: [@user_id], client_ids: [@client_id])
end
def save_log(message, timestamp)
@logs << "[#{timestamp}] #{message}"
end
end end
end end

View File

@ -0,0 +1,102 @@
# frozen_string_literal: true
module BackupRestore
class RunningSidekiqJobsError < RuntimeError
def initialize
super("Sidekiq did not finish running all the jobs in the allowed time!")
end
end
class SystemInterface
delegate :log, to: :@logger, private: true
def initialize(logger)
@logger = logger
@current_db = RailsMultisite::ConnectionManagement.current_db
@readonly_mode_was_enabled = Discourse.readonly_mode?
end
def enable_readonly_mode
return if @readonly_mode_was_enabled
log "Enabling readonly mode..."
Discourse.enable_readonly_mode
end
def disable_readonly_mode
return if @readonly_mode_was_enabled
log "Disabling readonly mode..."
Discourse.disable_readonly_mode
rescue => ex
log "Something went wrong while disabling readonly mode.", ex
end
def mark_restore_as_running
log "Marking restore as running..."
BackupRestore.mark_as_running!
end
def mark_restore_as_not_running
log "Marking restore as finished..."
BackupRestore.mark_as_not_running!
rescue => ex
log "Something went wrong while marking restore as finished.", ex
end
def listen_for_shutdown_signal
Thread.new do
while BackupRestore.is_operation_running?
exit if BackupRestore.should_shutdown?
sleep 0.1
end
end
end
def pause_sidekiq
log "Pausing sidekiq..."
Sidekiq.pause!
end
def unpause_sidekiq
log "Unpausing sidekiq..."
Sidekiq.unpause!
rescue => ex
log "Something went wrong while unpausing Sidekiq.", ex
end
def wait_for_sidekiq
# Wait at least 6 seconds because the data about workers is updated every 5 seconds
# https://github.com/mperham/sidekiq/wiki/API#workers
max_wait_seconds = 60
wait_seconds = 6.0
log "Waiting up to #{max_wait_seconds} seconds for Sidekiq to finish running jobs..."
max_iterations = (max_wait_seconds / wait_seconds).ceil
iterations = 1
loop do
sleep wait_seconds
break if !sidekiq_has_running_jobs?
iterations += 1
raise RunningSidekiqJobsError.new if iterations > max_iterations
log "Waiting for sidekiq to finish running jobs... ##{iterations}"
end
end
protected
def sidekiq_has_running_jobs?
Sidekiq::Workers.new.each do |_, _, work|
args = work&.dig("payload", "args")&.first
current_site_id = args["current_site_id"] if args.present?
return true if current_site_id.blank? || current_site_id == @current_db
end
false
end
end
end

View File

@ -0,0 +1,136 @@
# frozen_string_literal: true
module BackupRestore
UploadsRestoreError = Class.new(RuntimeError)
class UploadsRestorer
delegate :log, to: :@logger, private: true
def initialize(logger)
@logger = logger
end
def restore(tmp_directory)
upload_directories = Dir.glob(File.join(tmp_directory, "uploads", "*"))
.reject { |path| File.basename(path).start_with?("PaxHeaders") }
if upload_directories.count > 1
raise UploadsRestoreError.new("Could not find uploads, because the uploads " \
"directory contains multiple folders.")
end
@tmp_uploads_path = upload_directories.first
return if @tmp_uploads_path.blank?
@previous_db_name = BackupMetadata.value_for("db_name") || File.basename(@tmp_uploads_path)
@current_db_name = RailsMultisite::ConnectionManagement.current_db
backup_contains_optimized_images = File.exist?(File.join(@tmp_uploads_path, "optimized"))
remap_uploads
restore_uploads
generate_optimized_images unless backup_contains_optimized_images
rebake_posts_with_uploads
end
protected
def restore_uploads
store = Discourse.store
if !store.respond_to?(:copy_from)
# a FileStore implementation from a plugin might not support this method, so raise a helpful error
store_name = Discourse.store.class.name
raise UploadsRestoreError.new("The current file store (#{store_name}) does not support restoring uploads.")
end
log "Restoring uploads, this may take a while..."
store.copy_from(@tmp_uploads_path)
end
# Remaps upload URLs depending on old and new configuration.
# URLs of uploads differ a little bit between local uploads and uploads stored on S3.
# Multisites are another reason why URLs can be different.
#
# Examples:
# * regular site, local storage
# /uploads/default/original/1X/63b76551662ccea1a594e161c37dd35188d77657.jpeg
#
# * regular site, S3
# //bucket-name.s3.dualstack.us-west-2.amazonaws.com/original/1X/63b76551662ccea1a594e161c37dd35188d77657.jpeg
#
# * multisite, local storage
# /uploads/<site-name>/original/1X/63b76551662ccea1a594e161c37dd35188d77657.jpeg
#
# * multisite, S3
# //bucket-name.s3.dualstack.us-west-2.amazonaws.com/uploads/<site-name>/original/1X/63b76551662ccea1a594e161c37dd35188d77657.jpeg
def remap_uploads
log "Remapping uploads..."
was_multisite = BackupMetadata.value_for("multisite") == "t"
upload_path = "/#{Discourse.store.upload_path}/"
uploads_folder = was_multisite ? "/" : upload_path
if (old_base_url = BackupMetadata.value_for("base_url")) && old_base_url != Discourse.base_url
remap(old_base_url, Discourse.base_url)
end
current_s3_base_url = SiteSetting::Upload.enable_s3_uploads ? SiteSetting::Upload.s3_base_url : nil
if (old_s3_base_url = BackupMetadata.value_for("s3_base_url")) && old_s3_base_url != current_s3_base_url
remap("#{old_s3_base_url}/", uploads_folder)
end
current_s3_cdn_url = SiteSetting::Upload.enable_s3_uploads ? SiteSetting::Upload.s3_cdn_url : nil
if (old_s3_cdn_url = BackupMetadata.value_for("s3_cdn_url")) && old_s3_cdn_url != current_s3_cdn_url
base_url = current_s3_cdn_url || Discourse.base_url
remap("#{old_s3_cdn_url}/", UrlHelper.schemaless("#{base_url}#{uploads_folder}"))
old_host = URI.parse(old_s3_cdn_url).host
new_host = URI.parse(base_url).host
remap(old_host, new_host) if old_host != new_host
end
if (old_cdn_url = BackupMetadata.value_for("cdn_url")) && old_cdn_url != Discourse.asset_host
base_url = Discourse.asset_host || Discourse.base_url
remap("#{old_cdn_url}/", UrlHelper.schemaless("#{base_url}/"))
old_host = URI.parse(old_cdn_url).host
new_host = URI.parse(base_url).host
remap(old_host, new_host) if old_host != new_host
end
if @previous_db_name != @current_db_name
remap("/uploads/#{@previous_db_name}/", upload_path)
end
rescue => ex
log "Something went wrong while remapping uploads.", ex
end
def remap(from, to)
log "Remapping '#{from}' to '#{to}'"
DbHelper.remap(from, to, verbose: true, excluded_tables: ["backup_metadata"])
end
def generate_optimized_images
log "Optimizing site icons..."
DB.exec("TRUNCATE TABLE optimized_images")
SiteIconManager.ensure_optimized!
User.where("uploaded_avatar_id IS NOT NULL").find_each do |user|
Jobs.enqueue(:create_avatar_thumbnails, upload_id: user.uploaded_avatar_id)
end
end
def rebake_posts_with_uploads
log 'Posts will be rebaked by a background job in sidekiq. You will see missing images until that has completed.'
log 'You can expedite the process by manually running "rake posts:rebake_uncooked_posts"'
DB.exec(<<~SQL)
UPDATE posts
SET baked_version = NULL
WHERE id IN (SELECT post_id FROM post_uploads)
SQL
end
end
end

View File

@ -100,6 +100,16 @@ module FileStore
list_missing(OptimizedImage) unless skip_optimized list_missing(OptimizedImage) unless skip_optimized
end end
def copy_from(source_path)
FileUtils.mkdir_p(File.join(public_dir, upload_path))
Discourse::Utils.execute_command(
'rsync', '-a', '--safe-links', "#{source_path}/", "#{upload_path}/",
failure_message: "Failed to copy uploads.",
chdir: public_dir
)
end
private private
def list_missing(model) def list_missing(model)

View File

@ -174,6 +174,32 @@ module FileStore
@s3_helper.download_file(get_upload_key(upload), destination_path) @s3_helper.download_file(get_upload_key(upload), destination_path)
end end
def copy_from(source_path)
local_store = FileStore::LocalStore.new
public_upload_path = File.join(local_store.public_dir, local_store.upload_path)
# The migration to S3 and lots of other code expects files to exist in public/uploads,
# so lets move them there before executing the migration.
if public_upload_path != source_path
if Dir.exist?(public_upload_path)
old_upload_path = "#{public_upload_path}_#{SecureRandom.hex}"
FileUtils.mv(public_upload_path, old_upload_path)
end
end
FileUtils.mkdir_p(File.expand_path("..", public_upload_path))
FileUtils.symlink(source_path, public_upload_path)
FileStore::ToS3Migration.new(
s3_options: FileStore::ToS3Migration.s3_options_from_env,
migrate_to_multisite: Rails.configuration.multisite,
).migrate
ensure
FileUtils.rm(public_upload_path) if File.symlink?(public_upload_path)
FileUtils.mv(old_upload_path, public_upload_path) if old_upload_path
end
private private
def presigned_url(url, force_download: false, filename: false) def presigned_url(url, force_download: false, filename: false)

View File

@ -0,0 +1,346 @@
# frozen_string_literal: true
module FileStore
ToS3MigrationError = Class.new(RuntimeError)
class ToS3Migration
def initialize(s3_options:, dry_run: false, migrate_to_multisite: false, skip_etag_verify: false)
@s3_bucket = s3_options[:bucket]
@s3_client_options = s3_options[:client_options]
@dry_run = dry_run
@migrate_to_multisite = migrate_to_multisite
@skip_etag_verify = skip_etag_verify
@current_db = RailsMultisite::ConnectionManagement.current_db
end
def self.s3_options_from_site_settings
{
client_options: S3Helper.s3_options(SiteSetting),
bucket: SiteSetting.s3_upload_bucket
}
end
def self.s3_options_from_env
unless ENV["DISCOURSE_S3_BUCKET"].present? &&
ENV["DISCOURSE_S3_REGION"].present? &&
(
(
ENV["DISCOURSE_S3_ACCESS_KEY_ID"].present? &&
ENV["DISCOURSE_S3_SECRET_ACCESS_KEY"].present?
) || ENV["DISCOURSE_S3_USE_IAM_PROFILE"].present?
)
raise ToS3MigrationError.new(<<~TEXT)
Please provide the following environment variables:
- DISCOURSE_S3_BUCKET
- DISCOURSE_S3_REGION
and either
- DISCOURSE_S3_ACCESS_KEY_ID
- DISCOURSE_S3_SECRET_ACCESS_KEY
or
- DISCOURSE_S3_USE_IAM_PROFILE
TEXT
end
opts = { region: ENV["DISCOURSE_S3_REGION"] }
opts[:endpoint] = ENV["DISCOURSE_S3_ENDPOINT"] if ENV["DISCOURSE_S3_ENDPOINT"].present?
if ENV["DISCOURSE_S3_USE_IAM_PROFILE"].blank?
opts[:access_key_id] = ENV["DISCOURSE_S3_ACCESS_KEY_ID"]
opts[:secret_access_key] = ENV["DISCOURSE_S3_SECRET_ACCESS_KEY"]
end
{
client_options: opts,
bucket: ENV["DISCOURSE_S3_BUCKET"]
}
end
def migrate
migrate_to_s3
end
def migration_successful?(should_raise: false)
success = true
failure_message = "S3 migration failed for db '#{@current_db}'."
prefix = @migrate_to_multisite ? "uploads/#{@current_db}/original/" : "original/"
base_url = File.join(SiteSetting.Upload.s3_base_url, prefix)
count = Upload.by_users.where("url NOT LIKE '#{base_url}%'").count
if count > 0
error_message = "#{count} of #{Upload.count} uploads are not migrated to S3. #{failure_message}"
raise_or_log(error_message, should_raise)
success = false
end
cdn_path = SiteSetting.cdn_path("/uploads/#{@current_db}/original").sub(/https?:/, "")
count = Post.where("cooked LIKE '%#{cdn_path}%'").count
if count > 0
error_message = "#{count} posts are not remapped to new S3 upload URL. #{failure_message}"
raise_or_log(error_message, should_raise)
success = false
end
Discourse::Application.load_tasks
Rake::Task['posts:missing_uploads'].invoke('single_site')
count = PostCustomField.where(name: Post::MISSING_UPLOADS).count
if count > 0
error_message = "rake posts:missing_uploads identified #{count} issues. #{failure_message}"
raise_or_log(error_message, should_raise)
success = false
end
count = Post.where('baked_version <> ? OR baked_version IS NULL', Post::BAKED_VERSION).count
if count > 0
log("#{count} posts still require rebaking and will be rebaked during regular job")
log("To speed up migrations of posts we recommend you run 'rake posts:rebake_uncooked_posts'") if count > 100
success = false
else
log("No posts require rebaking")
end
success
end
protected
def log(message)
puts message
end
def raise_or_log(message, should_raise)
if should_raise
raise ToS3MigrationError.new(message)
else
log(message)
end
end
def uploads_migrated_to_new_scheme?
seeded_image_url = "#{GlobalSetting.relative_url_root}/uploads/#{@current_db}/original/_X/"
!Upload.by_users.where("url NOT LIKE '//%' AND url NOT LIKE '#{seeded_image_url}%'").exists?
end
def migrate_to_s3
# we don't want have migrated state, ensure we run all jobs here
Jobs.run_immediately!
log "Checking if #{@current_db} already migrated..."
return log "Already migrated #{@current_db}!" if migration_successful?
log "*" * 30 + " DRY RUN " + "*" * 30 if @dry_run
log "Migrating uploads to S3 for '#{@current_db}'..."
if !uploads_migrated_to_new_scheme?
log "Some uploads were not migrated to the new scheme. Running the migration, this may take a while..."
SiteSetting.migrate_to_new_scheme = true
Upload.migrate_to_new_scheme
if !uploads_migrated_to_new_scheme?
raise ToS3MigrationError.new("Some uploads could not be migrated to the new scheme. " \
"You need to fix this manually.")
end
end
bucket_has_folder_path = true if @s3_bucket.include? "/"
public_directory = Rails.root.join("public").to_s
s3 = Aws::S3::Client.new(@s3_client_options)
if bucket_has_folder_path
bucket, folder = S3Helper.get_bucket_and_folder_path(@s3_bucket)
folder = File.join(folder, "/")
else
bucket, folder = @s3_bucket, ""
end
log "Uploading files to S3..."
log " - Listing local files"
local_files = []
IO.popen("cd #{public_directory} && find uploads/#{@current_db}/original -type f").each do |file|
local_files << file.chomp
putc "." if local_files.size % 1000 == 0
end
log " => #{local_files.size} files"
log " - Listing S3 files"
s3_objects = []
prefix = @migrate_to_multisite ? "uploads/#{@current_db}/original/" : "original/"
options = { bucket: bucket, prefix: folder + prefix }
loop do
response = s3.list_objects_v2(options)
s3_objects.concat(response.contents)
putc "."
break if response.next_continuation_token.blank?
options[:continuation_token] = response.next_continuation_token
end
log " => #{s3_objects.size} files"
log " - Syncing files to S3"
synced = 0
failed = []
local_files.each do |file|
path = File.join(public_directory, file)
name = File.basename(path)
etag = Digest::MD5.file(path).hexdigest unless @skip_etag_verify
key = file[file.index(prefix)..-1]
key.prepend(folder) if bucket_has_folder_path
original_path = file.sub("uploads/#{@current_db}", "")
if s3_object = s3_objects.find { |obj| obj.key.ends_with?(original_path) }
next if File.size(path) == s3_object.size && (@skip_etag_verify || s3_object.etag[etag])
end
options = {
acl: "public-read",
body: File.open(path, "rb"),
bucket: bucket,
content_type: MiniMime.lookup_by_filename(name)&.content_type,
key: key,
}
if !FileHelper.is_supported_image?(name)
upload = Upload.find_by(url: "/#{file}")
if upload&.original_filename
options[:content_disposition] =
%Q{attachment; filename="#{upload.original_filename}"}
end
if upload&.secure
options[:acl] = "private"
end
end
etag ||= Digest::MD5.file(path).hexdigest
if @dry_run
log "#{file} => #{options[:key]}"
synced += 1
elsif s3.put_object(options).etag[etag]
putc "."
synced += 1
else
putc "X"
failed << path
end
end
puts
failure_message = "S3 migration failed for db '#{@current_db}'."
if failed.size > 0
log "Failed to upload #{failed.size} files"
log failed.join("\n")
raise failure_message
elsif s3_objects.size + synced >= local_files.size
log "Updating the URLs in the database..."
from = "/uploads/#{@current_db}/original/"
to = "#{SiteSetting.Upload.s3_base_url}/#{prefix}"
if @dry_run
log "REPLACING '#{from}' WITH '#{to}'"
else
DbHelper.remap(from, to, anchor_left: true)
end
[
[
"src=\"/uploads/#{@current_db}/original/(\\dX/(?:[a-f0-9]/)*[a-f0-9]{40}[a-z0-9\\.]*)",
"src=\"#{SiteSetting.Upload.s3_base_url}/#{prefix}\\1"
],
[
"src='/uploads/#{@current_db}/original/(\\dX/(?:[a-f0-9]/)*[a-f0-9]{40}[a-z0-9\\.]*)",
"src='#{SiteSetting.Upload.s3_base_url}/#{prefix}\\1"
],
[
"href=\"/uploads/#{@current_db}/original/(\\dX/(?:[a-f0-9]/)*[a-f0-9]{40}[a-z0-9\\.]*)",
"href=\"#{SiteSetting.Upload.s3_base_url}/#{prefix}\\1"
],
[
"href='/uploads/#{@current_db}/original/(\\dX/(?:[a-f0-9]/)*[a-f0-9]{40}[a-z0-9\\.]*)",
"href='#{SiteSetting.Upload.s3_base_url}/#{prefix}\\1"
],
[
"\\[img\\]/uploads/#{@current_db}/original/(\\dX/(?:[a-f0-9]/)*[a-f0-9]{40}[a-z0-9\\.]*)\\[/img\\]",
"[img]#{SiteSetting.Upload.s3_base_url}/#{prefix}\\1[/img]"
]
].each do |from_url, to_url|
if @dry_run
log "REPLACING '#{from_url}' WITH '#{to_url}'"
else
DbHelper.regexp_replace(from_url, to_url)
end
end
unless @dry_run
# Legacy inline image format
Post.where("raw LIKE '%![](/uploads/default/original/%)%'").each do |post|
regexp = /!\[\](\/uploads\/#{@current_db}\/original\/(\dX\/(?:[a-f0-9]\/)*[a-f0-9]{40}[a-z0-9\.]*))/
post.raw.scan(regexp).each do |upload_url, _|
upload = Upload.get_from_url(upload_url)
post.raw = post.raw.gsub("![](#{upload_url})", "![](#{upload.short_url})")
end
post.save!(validate: false)
end
end
if Discourse.asset_host.present?
# Uploads that were on local CDN will now be on S3 CDN
from = "#{Discourse.asset_host}/uploads/#{@current_db}/original/"
to = "#{SiteSetting.Upload.s3_cdn_url}/#{prefix}"
if @dry_run
log "REMAPPING '#{from}' TO '#{to}'"
else
DbHelper.remap(from, to)
end
end
# Uploads that were on base hostname will now be on S3 CDN
from = "#{Discourse.base_url}/uploads/#{@current_db}/original/"
to = "#{SiteSetting.Upload.s3_cdn_url}/#{prefix}"
if @dry_run
log "REMAPPING '#{from}' TO '#{to}'"
else
DbHelper.remap(from, to)
end
unless @dry_run
log "Removing old optimized images..."
OptimizedImage
.joins("LEFT JOIN uploads u ON optimized_images.upload_id = u.id")
.where("u.id IS NOT NULL AND u.url LIKE '//%' AND optimized_images.url NOT LIKE '//%'")
.delete_all
log "Flagging all posts containing lightboxes for rebake..."
count = Post.where("cooked LIKE '%class=\"lightbox\"%'").update_all(baked_version: nil)
log "#{count} posts were flagged for a rebake"
end
end
migration_successful?(should_raise: true)
log "Done!"
ensure
Jobs.run_later!
end
end
end

View File

@ -67,6 +67,10 @@ class Migration::SafeMigrate
end end
end end
def self.post_migration_path
Discourse::DB_POST_MIGRATE_PATH
end
def self.enable! def self.enable!
return if PG::Connection.method_defined?(:exec_migrator_unpatched) return if PG::Connection.method_defined?(:exec_migrator_unpatched)

View File

@ -224,59 +224,19 @@ def migrate_to_s3_all_sites
end end
end end
def migration_successful?(db, should_raise = false) def migrate_to_s3
success = true FileStore::ToS3Migration.new(
s3_options: FileStore::ToS3Migration.s3_options_from_env,
failure_message = "S3 migration failed for db '#{db}'." dry_run: !!ENV["DRY_RUN"],
prefix = ENV["MIGRATE_TO_MULTISITE"] ? "uploads/#{db}/original/" : "original/" migrate_to_multisite: !!ENV["MIGRATE_TO_MULTISITE"],
skip_etag_verify: !!ENV["SKIP_ETAG_VERIFY"]
base_url = File.join(SiteSetting.Upload.s3_base_url, prefix) ).migrate
count = Upload.by_users.where("url NOT LIKE '#{base_url}%'").count
error_message = "#{count} of #{Upload.count} uploads are not migrated to S3. #{failure_message}"
raise error_message if count > 0 && should_raise
success &&= count == 0
puts error_message if count > 0
cdn_path = SiteSetting.cdn_path("/uploads/#{db}/original").sub(/https?:/, "")
count = Post.where("cooked LIKE '%#{cdn_path}%'").count
error_message = "#{count} posts are not remapped to new S3 upload URL. #{failure_message}"
raise error_message if count > 0 && should_raise
success &&= count == 0
puts error_message if count > 0
Rake::Task['posts:missing_uploads'].invoke('single_site')
count = PostCustomField.where(name: Post::MISSING_UPLOADS).count
error_message = "rake posts:missing_uploads identified #{count} issues. #{failure_message}"
raise error_message if count > 0 && should_raise
success &&= count == 0
puts error_message if count > 0
count = Post.where('baked_version <> ? OR baked_version IS NULL', Post::BAKED_VERSION).count
if count > 0
puts "#{count} posts still require rebaking and will be rebaked during regular job"
if count > 100
puts "To speed up migrations of posts we recommend you run 'rake posts:rebake_uncooked_posts'"
end
success = false
else
puts "No posts require rebaking"
end
success
end end
task "uploads:s3_migration_status" => :environment do task "uploads:s3_migration_status" => :environment do
success = true success = true
RailsMultisite::ConnectionManagement.each_connection do RailsMultisite::ConnectionManagement.each_connection do
db = RailsMultisite::ConnectionManagement.current_db success &&= FileStore::ToS3Migration.new.migration_successful?
success &&= migration_successful?(db)
end end
queued_jobs = Sidekiq::Stats.new.queues.sum { |_ , x| x } queued_jobs = Sidekiq::Stats.new.queues.sum { |_ , x| x }
@ -293,266 +253,6 @@ task "uploads:s3_migration_status" => :environment do
puts "All sites appear to have uploads in order!" puts "All sites appear to have uploads in order!"
end end
def migrate_to_s3
# we don't want have migrated state, ensure we run all jobs here
Jobs.run_immediately!
db = RailsMultisite::ConnectionManagement.current_db
dry_run = !!ENV["DRY_RUN"]
puts "Checking if #{db} already migrated..."
return puts "Already migrated #{db}!" if migration_successful?(db)
puts "*" * 30 + " DRY RUN " + "*" * 30 if dry_run
puts "Migrating uploads to S3 for '#{db}'..."
if Upload.by_users.where("url NOT LIKE '//%' AND url NOT LIKE '#{GlobalSetting.relative_url_root}/uploads/#{db}/original/_X/%'").exists?
puts <<~TEXT
Some uploads were not migrated to the new scheme. Please run these commands in the rails console
SiteSetting.migrate_to_new_scheme = true
Jobs::MigrateUploadScheme.new.execute(nil)
TEXT
exit 1
end
unless ENV["DISCOURSE_S3_BUCKET"].present? &&
ENV["DISCOURSE_S3_REGION"].present? &&
(
(
ENV["DISCOURSE_S3_ACCESS_KEY_ID"].present? &&
ENV["DISCOURSE_S3_SECRET_ACCESS_KEY"].present?
) ||
ENV["DISCOURSE_S3_USE_IAM_PROFILE"].present?
)
puts <<~TEXT
Please provide the following environment variables
- DISCOURSE_S3_BUCKET
- DISCOURSE_S3_REGION
and either
- DISCOURSE_S3_ACCESS_KEY_ID
- DISCOURSE_S3_SECRET_ACCESS_KEY
or
- DISCOURSE_S3_USE_IAM_PROFILE
TEXT
exit 2
end
if SiteSetting.Upload.s3_cdn_url.blank?
puts "Please provide the 'DISCOURSE_S3_CDN_URL' environment variable"
exit 3
end
bucket_has_folder_path = true if ENV["DISCOURSE_S3_BUCKET"].include? "/"
public_directory = Rails.root.join("public").to_s
opts = {
region: ENV["DISCOURSE_S3_REGION"],
access_key_id: ENV["DISCOURSE_S3_ACCESS_KEY_ID"],
secret_access_key: ENV["DISCOURSE_S3_SECRET_ACCESS_KEY"]
}
# S3::Client ignores the `region` option when an `endpoint` is provided.
# Without `region`, non-default region bucket creation will break for S3, so we can only
# define endpoint when not using S3 i.e. when SiteSetting.s3_endpoint is provided.
opts[:endpoint] = SiteSetting.s3_endpoint if SiteSetting.s3_endpoint.present?
s3 = Aws::S3::Client.new(opts)
if bucket_has_folder_path
bucket, folder = S3Helper.get_bucket_and_folder_path(ENV["DISCOURSE_S3_BUCKET"])
folder = File.join(folder, "/")
else
bucket, folder = ENV["DISCOURSE_S3_BUCKET"], ""
end
puts "Uploading files to S3..."
print " - Listing local files"
local_files = []
IO.popen("cd #{public_directory} && find uploads/#{db}/original -type f").each do |file|
local_files << file.chomp
putc "." if local_files.size % 1000 == 0
end
puts " => #{local_files.size} files"
print " - Listing S3 files"
s3_objects = []
prefix = ENV["MIGRATE_TO_MULTISITE"] ? "uploads/#{db}/original/" : "original/"
options = { bucket: bucket, prefix: folder + prefix }
loop do
response = s3.list_objects_v2(options)
s3_objects.concat(response.contents)
putc "."
break if response.next_continuation_token.blank?
options[:continuation_token] = response.next_continuation_token
end
puts " => #{s3_objects.size} files"
puts " - Syncing files to S3"
synced = 0
failed = []
skip_etag_verify = ENV["SKIP_ETAG_VERIFY"].present?
local_files.each do |file|
path = File.join(public_directory, file)
name = File.basename(path)
etag = Digest::MD5.file(path).hexdigest unless skip_etag_verify
key = file[file.index(prefix)..-1]
key.prepend(folder) if bucket_has_folder_path
original_path = file.sub("uploads/#{db}", "")
if s3_object = s3_objects.find { |obj| obj.key.ends_with?(original_path) }
next if File.size(path) == s3_object.size && (skip_etag_verify || s3_object.etag[etag])
end
options = {
acl: "public-read",
body: File.open(path, "rb"),
bucket: bucket,
content_type: MiniMime.lookup_by_filename(name)&.content_type,
key: key,
}
if !FileHelper.is_supported_image?(name)
upload = Upload.find_by(url: "/#{file}")
if upload&.original_filename
options[:content_disposition] =
%Q{attachment; filename="#{upload.original_filename}"}
end
if upload&.secure
options[:acl] = "private"
end
end
etag ||= Digest::MD5.file(path).hexdigest
if dry_run
puts "#{file} => #{options[:key]}"
synced += 1
elsif s3.put_object(options).etag[etag]
putc "."
synced += 1
else
putc "X"
failed << path
end
end
puts
failure_message = "S3 migration failed for db '#{db}'."
if failed.size > 0
puts "Failed to upload #{failed.size} files"
puts failed.join("\n")
raise failure_message
elsif s3_objects.size + synced >= local_files.size
puts "Updating the URLs in the database..."
from = "/uploads/#{db}/original/"
to = "#{SiteSetting.Upload.s3_base_url}/#{prefix}"
if dry_run
puts "REPLACING '#{from}' WITH '#{to}'"
else
DbHelper.remap(from, to, anchor_left: true)
end
[
[
"src=\"/uploads/#{db}/original/(\\dX/(?:[a-f0-9]/)*[a-f0-9]{40}[a-z0-9\\.]*)",
"src=\"#{SiteSetting.Upload.s3_base_url}/#{prefix}\\1"
],
[
"src='/uploads/#{db}/original/(\\dX/(?:[a-f0-9]/)*[a-f0-9]{40}[a-z0-9\\.]*)",
"src='#{SiteSetting.Upload.s3_base_url}/#{prefix}\\1"
],
[
"href=\"/uploads/#{db}/original/(\\dX/(?:[a-f0-9]/)*[a-f0-9]{40}[a-z0-9\\.]*)",
"href=\"#{SiteSetting.Upload.s3_base_url}/#{prefix}\\1"
],
[
"href='/uploads/#{db}/original/(\\dX/(?:[a-f0-9]/)*[a-f0-9]{40}[a-z0-9\\.]*)",
"href='#{SiteSetting.Upload.s3_base_url}/#{prefix}\\1"
],
[
"\\[img\\]/uploads/#{db}/original/(\\dX/(?:[a-f0-9]/)*[a-f0-9]{40}[a-z0-9\\.]*)\\[/img\\]",
"[img]#{SiteSetting.Upload.s3_base_url}/#{prefix}\\1[/img]"
]
].each do |from_url, to_url|
if dry_run
puts "REPLACING '#{from_url}' WITH '#{to_url}'"
else
DbHelper.regexp_replace(from_url, to_url)
end
end
unless dry_run
# Legacy inline image format
Post.where("raw LIKE '%![](/uploads/default/original/%)%'").each do |post|
regexp = /!\[\](\/uploads\/#{db}\/original\/(\dX\/(?:[a-f0-9]\/)*[a-f0-9]{40}[a-z0-9\.]*))/
post.raw.scan(regexp).each do |upload_url, _|
upload = Upload.get_from_url(upload_url)
post.raw = post.raw.gsub("![](#{upload_url})", "![](#{upload.short_url})")
end
post.save!(validate: false)
end
end
if Discourse.asset_host.present?
# Uploads that were on local CDN will now be on S3 CDN
from = "#{Discourse.asset_host}/uploads/#{db}/original/"
to = "#{SiteSetting.Upload.s3_cdn_url}/#{prefix}"
if dry_run
puts "REMAPPING '#{from}' TO '#{to}'"
else
DbHelper.remap(from, to)
end
end
# Uploads that were on base hostname will now be on S3 CDN
from = "#{Discourse.base_url}/uploads/#{db}/original/"
to = "#{SiteSetting.Upload.s3_cdn_url}/#{prefix}"
if dry_run
puts "REMAPPING '#{from}' TO '#{to}'"
else
DbHelper.remap(from, to)
end
unless dry_run
puts "Removing old optimized images..."
OptimizedImage
.joins("LEFT JOIN uploads u ON optimized_images.upload_id = u.id")
.where("u.id IS NOT NULL AND u.url LIKE '//%' AND optimized_images.url NOT LIKE '//%'")
.delete_all
puts "Flagging all posts containing lightboxes for rebake..."
count = Post.where("cooked LIKE '%class=\"lightbox\"%'").update_all(baked_version: nil)
puts "#{count} posts were flagged for a rebake"
end
end
migration_successful?(db, true)
puts "Done!"
end
################################################################################ ################################################################################
# clean_up # # clean_up #
################################################################################ ################################################################################

View File

@ -135,9 +135,10 @@ class DiscourseCLI < Thor
begin begin
puts "Starting restore: #{filename}" puts "Starting restore: #{filename}"
restorer = BackupRestore::Restorer.new( restorer = BackupRestore::Restorer.new(
Discourse.system_user.id, user_id: Discourse.system_user.id,
filename: filename, filename: filename,
disable_emails: options[:disable_emails] disable_emails: options[:disable_emails],
factory: BackupRestore::Factory.new(user_id: Discourse.system_user.id)
) )
restorer.run restorer.run
puts 'Restore done.' puts 'Restore done.'

Binary file not shown.

Binary file not shown.

Binary file not shown.

View File

@ -0,0 +1,17 @@
# frozen_string_literal: true
class DropPostColumns < ActiveRecord::Migration[5.2]
DROPPED_COLUMNS ||= {
posts: %i{via_email raw_email}
}
def up
remove_column :posts, :via_email
remove_column :posts, :raw_email
raise ActiveRecord::Rollback
end
def down
raise "not tested"
end
end

View File

@ -1,6 +1,8 @@
# frozen_string_literal: true # frozen_string_literal: true
class DropEmailLogsTable < ActiveRecord::Migration[5.2] class DropEmailLogsTable < ActiveRecord::Migration[5.2]
DROPPED_TABLES ||= %i{email_logs}
def up def up
drop_table :email_logs drop_table :email_logs
raise ActiveRecord::Rollback raise ActiveRecord::Rollback

10
spec/fixtures/db/restore/error.sql vendored Normal file
View File

@ -0,0 +1,10 @@
--
-- PostgreSQL database dump
--
-- Dumped from database version 10.11 (Debian 10.11-1.pgdg100+1)
-- Dumped by pg_dump version 10.11 (Debian 10.11-1.pgdg100+1)
-- Started on 2019-12-28 00:24:29 UTC
SET foo = 0;

View File

@ -0,0 +1,31 @@
--
-- PostgreSQL database dump
--
-- Dumped from database version 10.11 (Debian 10.11-1.pgdg100+1)
-- Dumped by pg_dump version 10.11 (Debian 10.11-1.pgdg100+1)
-- Started on 2019-12-28 00:24:29 UTC
SET statement_timeout = 0;
SET lock_timeout = 0;
SET idle_in_transaction_session_timeout = 0;
SET client_encoding = 'UTF8';
SET standard_conforming_strings = on;
SELECT pg_catalog.set_config('search_path', '', false);
SET check_function_bodies = false;
SET xmloption = content;
SET client_min_messages = warning;
SET row_security = off;
SET default_tablespace = '';
SET default_with_oids = false;
--
-- TOC entry 198 (class 1259 OID 16573)
-- Name: foo; Type: TABLE; Schema: public; Owner: -
CREATE TABLE public.foo (
id integer NOT NULL
);

View File

@ -0,0 +1,49 @@
--
-- PostgreSQL database dump
--
-- Dumped from database version 11.6 (Debian 11.6-1.pgdg90+1)
-- Dumped by pg_dump version 11.6 (Debian 11.6-1.pgdg90+1)
-- Started on 2019-12-28 00:38:51 UTC
SET statement_timeout = 0;
SET lock_timeout = 0;
SET idle_in_transaction_session_timeout = 0;
SET client_encoding = 'UTF8';
SET standard_conforming_strings = on;
SELECT pg_catalog.set_config('search_path', '', false);
SET check_function_bodies = false;
SET xmloption = content;
SET client_min_messages = warning;
SET row_security = off;
--
-- TOC entry 5 (class 2615 OID 2200)
-- Name: public; Type: SCHEMA; Schema: -; Owner: -
--
CREATE SCHEMA public;
--
-- TOC entry 4782 (class 0 OID 0)
-- Dependencies: 5
-- Name: SCHEMA public; Type: COMMENT; Schema: -; Owner: -
--
COMMENT ON SCHEMA public IS 'standard public schema';
SET default_tablespace = '';
SET default_with_oids = false;
--
-- TOC entry 198 (class 1259 OID 16585)
-- Name: foo; Type: TABLE; Schema: public; Owner: -
--
CREATE TABLE public.foo (
id integer NOT NULL
);

View File

@ -0,0 +1,49 @@
--
-- PostgreSQL database dump
--
-- Dumped from database version 12.1 (Debian 12.1-1.pgdg100+1)
-- Dumped by pg_dump version 12.1 (Debian 12.1-1.pgdg100+1)
-- Started on 2019-12-28 00:35:48 UTC
SET statement_timeout = 0;
SET lock_timeout = 0;
SET idle_in_transaction_session_timeout = 0;
SET client_encoding = 'UTF8';
SET standard_conforming_strings = on;
SELECT pg_catalog.set_config('search_path', '', false);
SET check_function_bodies = false;
SET xmloption = content;
SET client_min_messages = warning;
SET row_security = off;
--
-- TOC entry 5 (class 2615 OID 2200)
-- Name: public; Type: SCHEMA; Schema: -; Owner: -
--
CREATE SCHEMA public;
--
-- TOC entry 4825 (class 0 OID 0)
-- Dependencies: 5
-- Name: SCHEMA public; Type: COMMENT; Schema: -; Owner: -
--
COMMENT ON SCHEMA public IS 'standard public schema';
SET default_tablespace = '';
SET default_table_access_method = heap;
--
-- TOC entry 204 (class 1259 OID 16587)
-- Name: foo; Type: TABLE; Schema: public; Owner: -
--
CREATE TABLE public.foo (
id integer NOT NULL
);

View File

@ -0,0 +1,29 @@
--
-- PostgreSQL database dump
--
-- Dumped from database version 9.3.11
-- Dumped by pg_dump version 9.3.11
-- Started on 2019-12-27 20:54:40 UTC
SET statement_timeout = 0;
SET lock_timeout = 0;
SET client_encoding = 'UTF8';
SET standard_conforming_strings = on;
SET check_function_bodies = false;
SET client_min_messages = warning;
DROP SCHEMA IF EXISTS restore CASCADE; CREATE SCHEMA restore; SET search_path = restore, public, pg_catalog;
SET default_tablespace = '';
SET default_with_oids = false;
--
-- TOC entry 274 (class 1259 OID 18691)
-- Name: foo; Type: TABLE; Schema: public; Owner: -; Tablespace:
--
CREATE TABLE foo (
id integer NOT NULL
);

View File

@ -0,0 +1,31 @@
--
-- PostgreSQL database dump
--
-- Dumped from database version 9.5.10
-- Dumped by pg_dump version 9.5.19
-- Started on 2019-12-27 16:08:01 UTC
SET statement_timeout = 0;
SET lock_timeout = 0;
SET client_encoding = 'UTF8';
SET standard_conforming_strings = on;
SELECT pg_catalog.set_config('search_path', '', false);
SET check_function_bodies = false;
SET xmloption = content;
SET client_min_messages = warning;
SET row_security = off;
SET default_tablespace = '';
SET default_with_oids = false;
--
-- TOC entry 285 (class 1259 OID 18706)
-- Name: foo; Type: TABLE; Schema: public; Owner: -
--
CREATE TABLE public.foo (
id integer NOT NULL
);

View File

@ -0,0 +1,31 @@
--
-- PostgreSQL database dump
--
-- Dumped from database version 9.5.5
-- Dumped by pg_dump version 9.5.5
-- Started on 2019-11-07 16:41:33 UTC
SET statement_timeout = 0;
SET lock_timeout = 0;
SET client_encoding = 'UTF8';
SET standard_conforming_strings = on;
SET check_function_bodies = false;
SET client_min_messages = warning;
SET row_security = off;
SET search_path = public, pg_catalog;
SET default_tablespace = '';
SET default_with_oids = false;
--
-- TOC entry 284 (class 1259 OID 18697)
-- Name: foo; Type: TABLE; Schema: public; Owner: -
--
CREATE TABLE foo (
id integer NOT NULL
);

View File

@ -0,0 +1,77 @@
# frozen_string_literal: true
require 'rails_helper'
require_relative 'shared_context_for_backup_restore'
describe BackupRestore::BackupFileHandler do
include_context "shared stuff"
def expect_decompress_and_clean_up_to_work(backup_filename:, expected_dump_filename: "dump.sql",
require_metadata_file:, require_uploads:)
freeze_time(DateTime.parse('2019-12-24 14:31:48'))
source_file = File.join(Rails.root, "spec/fixtures/backups", backup_filename)
target_directory = BackupRestore::LocalBackupStore.base_directory
target_file = File.join(target_directory, backup_filename)
FileUtils.copy_file(source_file, target_file)
Dir.mktmpdir do |root_directory|
current_db = RailsMultisite::ConnectionManagement.current_db
file_handler = BackupRestore::BackupFileHandler.new(logger, backup_filename, current_db, root_directory)
tmp_directory, db_dump_path = file_handler.decompress
expected_tmp_path = File.join(root_directory, "tmp/restores", current_db, "2019-12-24-143148")
expect(tmp_directory).to eq(expected_tmp_path)
expect(db_dump_path).to eq(File.join(expected_tmp_path, expected_dump_filename))
expect(Dir.exist?(tmp_directory)).to eq(true)
expect(File.exist?(db_dump_path)).to eq(true)
expect(File.exist?(File.join(tmp_directory, "meta.json"))).to eq(require_metadata_file)
if require_uploads
upload_filename = "uploads/default/original/3X/b/d/bd269860bb508aebcb6f08fe7289d5f117830383.png"
expect(File.exist?(File.join(tmp_directory, upload_filename))).to eq(true)
else
expect(Dir.exist?(File.join(tmp_directory, "uploads"))).to eq(false)
end
file_handler.clean_up
expect(Dir.exist?(tmp_directory)).to eq(false)
end
ensure
FileUtils.rm(target_file)
# We don't want to delete the directory unless it is empty, otherwise this could be annoying
# when tests run for the "default" database in a development environment.
FileUtils.rmdir(target_directory) rescue nil
end
it "works with old backup file format", type: :multisite do
test_multisite_connection("second") do
expect_decompress_and_clean_up_to_work(
backup_filename: "backup_till_v1.5.tar.gz",
require_metadata_file: true,
require_uploads: true
)
end
end
it "works with current backup file format" do
expect_decompress_and_clean_up_to_work(
backup_filename: "backup_since_v1.6.tar.gz",
require_metadata_file: false,
require_uploads: true
)
end
it "works with SQL only backup file" do
expect_decompress_and_clean_up_to_work(
backup_filename: "sql_only_backup.sql.gz",
expected_dump_filename: "sql_only_backup.sql",
require_metadata_file: false,
require_uploads: false
)
end
end

View File

@ -0,0 +1,188 @@
# frozen_string_literal: true
require 'rails_helper'
require_relative 'shared_context_for_backup_restore'
describe BackupRestore::DatabaseRestorer do
include_context "shared stuff"
let(:current_db) { RailsMultisite::ConnectionManagement.current_db }
subject { BackupRestore::DatabaseRestorer.new(logger, current_db) }
def expect_create_readonly_functions
Migration::BaseDropper.expects(:create_readonly_function).at_least_once
end
def expect_table_move
BackupRestore.expects(:move_tables_between_schemas).with("public", "backup").once
end
def expect_psql(output_lines: ["output from psql"], exit_status: 0, stub_thread: false)
status = mock("psql status")
status.expects(:exitstatus).returns(exit_status).once
Process.expects(:last_status).returns(status).once
if stub_thread
thread = mock("thread")
thread.stubs(:join)
Thread.stubs(:new).returns(thread)
end
output_lines << nil
psql_io = mock("psql")
psql_io.expects(:readline).returns(*output_lines).times(output_lines.size)
IO.expects(:popen).yields(psql_io).once
end
def expect_db_migrate
Discourse::Utils.expects(:execute_command).with do |env, command, options|
env["SKIP_POST_DEPLOYMENT_MIGRATIONS"] == "0" &&
command == "rake db:migrate" &&
options[:chdir] == Rails.root
end.once
end
def expect_db_reconnect
RailsMultisite::ConnectionManagement.expects(:establish_connection).once
end
def execute_stubbed_restore(stub_readonly_functions: true, stub_psql: true, stub_migrate: true,
dump_file_path: "foo.sql")
expect_table_move
expect_create_readonly_functions if stub_readonly_functions
expect_psql if stub_psql
expect_db_migrate if stub_migrate
subject.restore(dump_file_path)
end
describe "#restore" do
it "executes everything in the correct order" do
restore = sequence("restore")
expect_table_move.in_sequence(restore)
expect_create_readonly_functions.in_sequence(restore)
expect_psql(stub_thread: true).in_sequence(restore)
expect_db_migrate.in_sequence(restore)
expect_db_reconnect.in_sequence(restore)
subject.restore("foo.sql")
end
context "with real psql" do
after do
DB.exec <<~SQL
-- Drop table and execute a commit to make the drop stick,
-- otherwise rspec will rollback the drop at the end of each test.
-- The tests in this context do not change the DB, so this should be safe.
DROP TABLE IF EXISTS foo;
COMMIT;
-- Start a new transaction in order to suppress the
-- "there is no transaction in progress" warnings from rspec.
BEGIN TRANSACTION;
SQL
end
def restore(filename, stub_migrate: true)
path = File.join(Rails.root, "spec/fixtures/db/restore", filename)
execute_stubbed_restore(stub_psql: false, stub_migrate: stub_migrate, dump_file_path: path)
end
def expect_restore_to_work(filename)
restore(filename, stub_migrate: true)
expect(ActiveRecord::Base.connection.table_exists?("foo")).to eq(true)
end
it "restores from PostgreSQL 9.3" do
# this covers the defaults of Discourse v1.0 up to v1.5
expect_restore_to_work("postgresql_9.3.11.sql")
end
it "restores from PostgreSQL 9.5.5" do
# it uses a slightly different header than later 9.5.x versions
expect_restore_to_work("postgresql_9.5.5.sql")
end
it "restores from PostgreSQL 9.5" do
# this covers the defaults of Discourse v1.6 up to v1.9
expect_restore_to_work("postgresql_9.5.10.sql")
end
it "restores from PostgreSQL 10" do
# this covers the defaults of Discourse v1.7 up to v2.4
expect_restore_to_work("postgresql_10.11.sql")
end
it "restores from PostgreSQL 11" do
expect_restore_to_work("postgresql_11.6.sql")
end
it "restores from PostgreSQL 12" do
expect_restore_to_work("postgresql_12.1.sql")
end
it "detects error during restore" do
expect { restore("error.sql", stub_migrate: false) }
.to raise_error(BackupRestore::DatabaseRestoreError)
end
end
context "database connection" do
it 'reconnects to the correct database', type: :multisite do
RailsMultisite::ConnectionManagement.establish_connection(db: 'second')
execute_stubbed_restore
expect(RailsMultisite::ConnectionManagement.current_db).to eq('second')
end
it 'it is not erroring for non-multisite' do
expect { execute_stubbed_restore }.not_to raise_error
end
end
end
describe "#rollback" do
it "moves tables back when tables were moved" do
BackupRestore.stubs(:can_rollback?).returns(true)
BackupRestore.expects(:move_tables_between_schemas).with("backup", "public").never
subject.rollback
execute_stubbed_restore
BackupRestore.expects(:move_tables_between_schemas).with("backup", "public").once
subject.rollback
end
end
context "readonly functions" do
before do
Migration::SafeMigrate.stubs(:post_migration_path).returns("spec/fixtures/db/post_migrate")
end
it "doesn't try to drop function when no functions have been created" do
Migration::BaseDropper.expects(:drop_readonly_function).never
subject.clean_up
end
it "creates and drops all functions when none exist" do
Migration::BaseDropper.expects(:create_readonly_function).with(:email_logs, nil)
Migration::BaseDropper.expects(:create_readonly_function).with(:posts, :via_email)
Migration::BaseDropper.expects(:create_readonly_function).with(:posts, :raw_email)
execute_stubbed_restore(stub_readonly_functions: false)
Migration::BaseDropper.expects(:drop_readonly_function).with(:email_logs, nil)
Migration::BaseDropper.expects(:drop_readonly_function).with(:posts, :via_email)
Migration::BaseDropper.expects(:drop_readonly_function).with(:posts, :raw_email)
subject.clean_up
end
it "creates and drops only missing functions during restore" do
Migration::BaseDropper.stubs(:existing_discourse_function_names)
.returns(%w(raise_email_logs_readonly raise_posts_raw_email_readonly))
Migration::BaseDropper.expects(:create_readonly_function).with(:posts, :via_email)
execute_stubbed_restore(stub_readonly_functions: false)
Migration::BaseDropper.expects(:drop_readonly_function).with(:posts, :via_email)
subject.clean_up
end
end
end

View File

@ -0,0 +1,81 @@
# frozen_string_literal: true
require 'rails_helper'
require_relative 'shared_context_for_backup_restore'
describe BackupRestore::MetaDataHandler do
include_context "shared stuff"
let!(:backup_filename) { 'discourse-2019-11-18-143242-v20191108000414.tar.gz' }
def with_metadata_file(content)
Dir.mktmpdir do |directory|
if !content.nil?
path = File.join(directory, BackupRestore::MetaDataHandler::METADATA_FILE)
File.write(path, content)
end
yield(directory)
end
end
def validate_metadata(filename, tmp_directory)
BackupRestore::MetaDataHandler.new(logger, filename, tmp_directory).validate
end
it "extracts metadata from file when metadata file exists" do
metadata = '{"source":"discourse","version":20160329101122}'
with_metadata_file(metadata) do |dir|
expect(validate_metadata(backup_filename, dir))
.to include(version: 20160329101122)
end
end
it "extracts metadata from filename when metadata file does not exist" do
with_metadata_file(nil) do |dir|
expect(validate_metadata(backup_filename, dir))
.to include(version: 20191108000414)
end
end
it "raises an exception when the metadata file contains invalid JSON" do
currupt_metadata = '{"version":20160329101122'
with_metadata_file(currupt_metadata) do |dir|
expect { validate_metadata(backup_filename, dir) }
.to raise_error(BackupRestore::MetaDataError)
end
end
it "raises an exception when the metadata file is empty" do
with_metadata_file('') do |dir|
expect { validate_metadata(backup_filename, dir) }
.to raise_error(BackupRestore::MetaDataError)
end
end
it "raises an exception when the filename contains no version number" do
filename = 'discourse-2019-11-18-143242.tar.gz'
expect { validate_metadata(filename, nil) }
.to raise_error(BackupRestore::MetaDataError)
end
it "raises an exception when the filename contains an invalid version number" do
filename = 'discourse-2019-11-18-143242-v123456789.tar.gz'
expect { validate_metadata(filename, nil) }
.to raise_error(BackupRestore::MetaDataError)
end
it "raises an exception when the backup's version is newer than the current version" do
new_backup_filename = 'discourse-2019-11-18-143242-v20191113193141.sql.gz'
BackupRestore.expects(:current_version)
.returns(20191025005204).once
expect { validate_metadata(new_backup_filename, nil) }
.to raise_error(BackupRestore::MigrationRequiredError)
end
end

View File

@ -2,135 +2,6 @@
require 'rails_helper' require 'rails_helper'
# Causes flakiness
describe BackupRestore::Restorer do describe BackupRestore::Restorer do
it 'detects which pg_dump output is restorable to different schemas' do
{
"9.6.7" => true,
"9.6.8" => false,
"9.6.9" => false,
"10.2" => true,
"10.3" => false,
"10.3.1" => false,
"10.4" => false,
"11" => false,
"11.4" => false,
"21" => false,
}.each do |key, value|
expect(described_class.pg_produces_portable_dump?(key)).to eq(value)
end
end
describe 'Decompressing a backup' do
let!(:admin) { Fabricate(:admin) }
before do
SiteSetting.allow_restore = true
@restore_path = File.join(Rails.root, "public", "backups", RailsMultisite::ConnectionManagement.current_db)
end
after do
FileUtils.rm_rf @restore_path
FileUtils.rm_rf @restorer.tmp_directory
end
context 'When there are uploads' do
before do
@restore_folder = "backup-#{SecureRandom.hex}"
@temp_folder = "#{@restore_path}/#{@restore_folder}"
FileUtils.mkdir_p("#{@temp_folder}/uploads")
Dir.chdir(@restore_path) do
File.write("#{@restore_folder}/dump.sql", 'This is a dump')
Compression::Gzip.new.compress(@restore_folder, 'dump.sql')
FileUtils.rm_rf("#{@restore_folder}/dump.sql")
File.write("#{@restore_folder}/uploads/upload.txt", 'This is an upload')
Compression::Tar.new.compress(@restore_path, @restore_folder)
end
Compression::Gzip.new.compress(@restore_path, "#{@restore_folder}.tar")
FileUtils.rm_rf @temp_folder
build_restorer("#{@restore_folder}.tar.gz")
end
it '#decompress_archive works correctly' do
@restorer.decompress_archive
expect(exists?("dump.sql.gz")).to eq(true)
expect(exists?("uploads", directory: true)).to eq(true)
end
it '#extract_dump works correctly' do
@restorer.decompress_archive
@restorer.extract_dump
expect(exists?('dump.sql')).to eq(true)
end
end
context 'When restoring a single file' do
before do
FileUtils.mkdir_p(@restore_path)
Dir.chdir(@restore_path) do
File.write('dump.sql', 'This is a dump')
Compression::Gzip.new.compress(@restore_path, 'dump.sql')
FileUtils.rm_rf('dump.sql')
end
build_restorer('dump.sql.gz')
end
it '#extract_dump works correctly with a single file' do
@restorer.extract_dump
expect(exists?("dump.sql")).to eq(true)
end
end
def exists?(relative_path, directory: false)
full_path = "#{@restorer.tmp_directory}/#{relative_path}"
directory ? File.directory?(full_path) : File.exists?(full_path)
end
def build_restorer(filename)
@restorer = described_class.new(admin.id, filename: filename)
@restorer.ensure_directory_exists(@restorer.tmp_directory)
@restorer.copy_archive_to_tmp_directory
end
end
context 'Database connection' do
let!(:admin) { Fabricate(:admin) }
before do
SiteSetting.allow_restore = true
described_class.any_instance.stubs(ensure_we_have_a_filename: true)
described_class.any_instance.stubs(initialize_state: true)
end
after do
SiteSetting.allow_restore = false
described_class.any_instance.unstub(:ensure_we_have_a_filename)
described_class.any_instance.unstub(:initialize_state)
end
let(:conn) { RailsMultisite::ConnectionManagement }
let(:restorer) { described_class.new(admin.id) }
it 'correctly reconnects to database', type: :multisite do
restorer.instance_variable_set(:@current_db, 'second')
conn.establish_connection(db: 'second')
expect(RailsMultisite::ConnectionManagement.current_db).to eq('second')
ActiveRecord::Base.connection_pool.spec.config[:db_key] = "incorrect_db"
restorer.send(:reconnect_database)
expect(RailsMultisite::ConnectionManagement.current_db).to eq('second')
end
it 'it is not erroring for non multisite', type: :multisite do
RailsMultisite::ConnectionManagement::clear_settings!
expect { restorer.send(:reconnect_database) }.not_to raise_error
end
end
end end

View File

@ -0,0 +1,9 @@
# frozen_string_literal: true
#
shared_context "shared stuff" do
let!(:logger) do
Class.new do
def log(message, ex = nil); end
end.new
end
end

View File

@ -0,0 +1,152 @@
# frozen_string_literal: true
require 'rails_helper'
require_relative 'shared_context_for_backup_restore'
describe BackupRestore::SystemInterface do
include_context "shared stuff"
subject { BackupRestore::SystemInterface.new(logger) }
context "readonly mode" do
after do
Discourse::READONLY_KEYS.each { |key| $redis.del(key) }
end
describe "#enable_readonly_mode" do
it "enables readonly mode" do
Discourse.expects(:enable_readonly_mode).once
subject.enable_readonly_mode
end
it "does not enable readonly mode when it is already in readonly mode" do
Discourse.enable_readonly_mode
Discourse.expects(:enable_readonly_mode).never
subject.enable_readonly_mode
end
end
describe "#disable_readonly_mode" do
it "disables readonly mode" do
Discourse.expects(:disable_readonly_mode).once
subject.disable_readonly_mode
end
it "does not disable readonly mode when readonly mode was explicitly enabled" do
Discourse.enable_readonly_mode
Discourse.expects(:disable_readonly_mode).never
subject.disable_readonly_mode
end
end
end
describe "#mark_restore_as_running" do
it "calls mark_restore_as_running" do
BackupRestore.expects(:mark_as_running!).once
subject.mark_restore_as_running
end
end
describe "#mark_restore_as_not_running" do
it "calls mark_restore_as_not_running" do
BackupRestore.expects(:mark_as_not_running!).once
subject.mark_restore_as_not_running
end
end
describe "#listen_for_shutdown_signal" do
before { BackupRestore.mark_as_running! }
after do
BackupRestore.clear_shutdown_signal!
BackupRestore.mark_as_not_running!
end
it "exits the process when shutdown signal is set" do
expect do
thread = subject.listen_for_shutdown_signal
BackupRestore.set_shutdown_signal!
thread.join
end.to raise_error(SystemExit)
end
end
describe "#pause_sidekiq" do
it "calls pause!" do
Sidekiq.expects(:pause!).once
subject.pause_sidekiq
end
end
describe "#unpause_sidekiq" do
it "calls unpause!" do
Sidekiq.expects(:unpause!).once
subject.unpause_sidekiq
end
end
describe "#wait_for_sidekiq" do
it "waits 6 seconds even when there are no running Sidekiq jobs" do
subject.expects(:sleep).with(6).once
subject.wait_for_sidekiq
end
context "with Sidekiq workers" do
before { $redis.flushall }
after { $redis.flushall }
def create_workers(site_id: nil, all_sites: false)
$redis.flushall
payload = Sidekiq::Testing.fake! do
data = { post_id: 1 }
if all_sites
data[:all_sites] = true
else
data[:current_site_id] = site_id || RailsMultisite::ConnectionManagement.current_db
end
Jobs.enqueue(:process_post, data)
Jobs::ProcessPost.jobs.last
end
Sidekiq.redis do |conn|
hostname = "localhost"
pid = 7890
key = "#{hostname}:#{pid}"
process = { pid: pid, hostname: hostname }
conn.sadd('processes', key)
conn.hmset(key, 'info', Sidekiq.dump_json(process))
data = Sidekiq.dump_json(
queue: 'default',
run_at: Time.now.to_i,
payload: Sidekiq.dump_json(payload)
)
conn.hmset("#{key}:workers", '444', data)
end
end
it "waits up to 60 seconds for jobs running for the current site to finish" do
subject.expects(:sleep).with(6).times(10)
create_workers
expect { subject.wait_for_sidekiq }.to raise_error(BackupRestore::RunningSidekiqJobsError)
end
it "waits up to 60 seconds for jobs running on all sites to finish" do
subject.expects(:sleep).with(6).times(10)
create_workers(all_sites: true)
expect { subject.wait_for_sidekiq }.to raise_error(BackupRestore::RunningSidekiqJobsError)
end
it "ignores jobs of other sites" do
subject.expects(:sleep).with(6).once
create_workers(site_id: "another_site")
subject.wait_for_sidekiq
end
end
end
end

View File

@ -0,0 +1,566 @@
# frozen_string_literal: true
require 'rails_helper'
require_relative 'shared_context_for_backup_restore'
describe BackupRestore::UploadsRestorer do
include_context "shared stuff"
subject { BackupRestore::UploadsRestorer.new(logger) }
def with_temp_uploads_directory(name: "default", with_optimized: false)
Dir.mktmpdir do |directory|
path = File.join(directory, "uploads", name)
FileUtils.mkdir_p(path)
FileUtils.mkdir(File.join(path, "optimized")) if with_optimized
yield(directory, path)
end
end
def expect_no_remap(source_site_name: nil, target_site_name:, metadata: [])
expect_remaps(
source_site_name: source_site_name,
target_site_name: target_site_name,
metadata: metadata
)
end
def expect_remap(source_site_name: nil, target_site_name:, metadata: [], from:, to:, &block)
expect_remaps(
source_site_name: source_site_name,
target_site_name: target_site_name,
metadata: metadata,
remaps: [{ from: from, to: to }],
&block
)
end
def expect_remaps(source_site_name: nil, target_site_name:, metadata: [], remaps: [], &block)
source_site_name ||= metadata.find { |d| d[:name] == "db_name" }&.dig(:value) || "default"
if source_site_name != target_site_name
site_rename = { from: "/uploads/#{source_site_name}/", to: uploads_path(target_site_name) }
remaps << site_rename unless remaps.last == site_rename
end
with_temp_uploads_directory(name: source_site_name, with_optimized: true) do |directory, path|
yield(directory) if block_given?
Discourse.store.class.any_instance.expects(:copy_from).with(path).once
if remaps.blank?
DbHelper.expects(:remap).never
else
DbHelper.expects(:remap).with do |from, to, args|
args[:excluded_tables]&.include?("backup_metadata")
remaps.shift == { from: from, to: to }
end.times(remaps.size)
end
if target_site_name == "default"
setup_and_restore(directory, metadata)
else
test_multisite_connection(target_site_name) { setup_and_restore(directory, metadata) }
end
end
end
def setup_and_restore(directory, metadata)
metadata.each { |d| BackupMetadata.create!(d) }
subject.restore(directory)
end
def uploads_path(database)
path = File.join("uploads", database)
if Discourse.is_parallel_test?
path = File.join(path, ENV['TEST_ENV_NUMBER'].presence || '1')
end
"/#{path}/"
end
context "uploads" do
let!(:multisite) { { name: "multisite", value: true } }
let!(:no_multisite) { { name: "multisite", value: false } }
let!(:source_db_name) { { name: "db_name", value: "foo" } }
let!(:base_url) { { name: "base_url", value: "https://www.example.com/forum" } }
let!(:no_cdn_url) { { name: "cdn_url", value: nil } }
let!(:cdn_url) { { name: "cdn_url", value: "https://some-cdn.example.com" } }
let(:target_site_name) { target_site_type == multisite ? "second" : "default" }
let(:target_hostname) { target_site_type == multisite ? "test2.localhost" : "test.localhost" }
shared_context "no uploads" do
it "does nothing when temporary uploads directory is missing or empty" do
store_class.any_instance.expects(:copy_from).never
Dir.mktmpdir do |directory|
subject.restore(directory)
FileUtils.mkdir(File.join(directory, "uploads"))
subject.restore(directory)
end
end
end
shared_examples "without metadata" do
it "correctly remaps uploads" do
expect_no_remap(target_site_name: "default")
end
it "correctly remaps when site name is different" do
expect_remap(
source_site_name: "foo",
target_site_name: "default",
from: "/uploads/foo/",
to: uploads_path("default")
)
end
end
shared_context "restores uploads" do
before do
Upload.where("id > 0").destroy_all
Fabricate(:optimized_image)
upload = Fabricate(:upload_s3)
post = Fabricate(:post, raw: "![#{upload.original_filename}](#{upload.short_url})")
post.link_post_uploads
FileHelper.stubs(:download).returns(file_from_fixtures("logo.png"))
FileStore::S3Store.any_instance.stubs(:store_upload).returns do
File.join(
"//s3-upload-bucket.s3.dualstack.us-east-1.amazonaws.com",
target_site_type == multisite ? "/uploads/#{target_site_name}" : "",
"original/1X/bc975735dfc6409c1c2aa5ebf2239949bcbdbd65.png"
)
end
UserAvatar.import_url_for_user("logo.png", Fabricate(:user))
end
it "successfully restores uploads" do
SiteIconManager.expects(:ensure_optimized!).once
with_temp_uploads_directory do |directory, path|
store_class.any_instance.expects(:copy_from).with(path).once
expect { subject.restore(directory) }
.to change { OptimizedImage.count }.by_at_most(-1)
.and change { Jobs::CreateAvatarThumbnails.jobs.size }.by(1)
.and change { Post.where(baked_version: nil).count }.by(1)
end
end
it "doesn't generate optimized images when backup contains optimized images" do
SiteIconManager.expects(:ensure_optimized!).never
with_temp_uploads_directory(with_optimized: true) do |directory, path|
store_class.any_instance.expects(:copy_from).with(path).once
expect { subject.restore(directory) }
.to change { OptimizedImage.count }.by(0)
.and change { Jobs::CreateAvatarThumbnails.jobs.size }.by(0)
.and change { Post.where(baked_version: nil).count }.by(1)
end
end
end
shared_examples "common remaps" do
it "remaps when `base_url` changes" do
Discourse.expects(:base_url).returns("http://localhost").at_least_once
expect_remap(
target_site_name: target_site_name,
metadata: [source_site_type, base_url],
from: "https://www.example.com/forum",
to: "http://localhost"
)
end
it "doesn't remap when `cdn_url` in `backup_metadata` is empty" do
expect_no_remap(
target_site_name: target_site_name,
metadata: [source_site_type, no_cdn_url]
)
end
it "remaps to new `cdn_url` when `cdn_url` changes to a different value" do
Discourse.expects(:asset_host).returns("https://new-cdn.example.com").at_least_once
expect_remaps(
target_site_name: target_site_name,
metadata: [source_site_type, cdn_url],
remaps: [
{ from: "https://some-cdn.example.com/", to: "https://new-cdn.example.com/" },
{ from: "some-cdn.example.com", to: "new-cdn.example.com" }
]
)
end
it "remaps to `base_url` when `cdn_url` changes to an empty value" do
Discourse.expects(:base_url).returns("http://example.com/discourse").at_least_once
Discourse.expects(:asset_host).returns(nil).at_least_once
expect_remaps(
target_site_name: target_site_name,
metadata: [source_site_type, cdn_url],
remaps: [
{ from: "https://some-cdn.example.com/", to: "//example.com/discourse/" },
{ from: "some-cdn.example.com", to: "example.com" }
]
)
end
end
shared_examples "remaps from local storage" do
it "doesn't remap when `s3_base_url` in `backup_metadata` is empty" do
expect_no_remap(
target_site_name: target_site_name,
metadata: [source_site_type, s3_base_url]
)
end
it "doesn't remap when `s3_cdn_url` in `backup_metadata` is empty" do
expect_no_remap(
target_site_name: target_site_name,
metadata: [source_site_type, s3_cdn_url]
)
end
end
context "currently stored locally" do
before do
SiteSetting.enable_s3_uploads = false
end
let!(:store_class) { FileStore::LocalStore }
include_context "no uploads"
include_context "restores uploads"
context "remaps" do
include_examples "without metadata"
context "uploads previously stored locally" do
let!(:s3_base_url) { { name: "s3_base_url", value: nil } }
let!(:s3_cdn_url) { { name: "s3_cdn_url", value: nil } }
context "from regular site" do
let!(:source_site_type) { no_multisite }
context "to regular site" do
let!(:target_site_type) { no_multisite }
include_examples "common remaps"
include_examples "remaps from local storage"
end
context "to multisite", type: :multisite do
let!(:target_site_type) { multisite }
include_examples "common remaps"
include_examples "remaps from local storage"
end
end
context "from multisite" do
let!(:source_site_type) { multisite }
context "to regular site" do
let!(:target_site_type) { no_multisite }
include_examples "common remaps"
include_examples "remaps from local storage"
end
context "to multisite", type: :multisite do
let!(:target_site_type) { multisite }
include_examples "common remaps"
include_examples "remaps from local storage"
end
end
end
context "uploads previously stored on S3" do
let!(:s3_base_url) { { name: "s3_base_url", value: "//old-bucket.s3-us-east-1.amazonaws.com" } }
let!(:s3_cdn_url) { { name: "s3_cdn_url", value: "https://s3-cdn.example.com" } }
shared_examples "regular site remaps from S3" do
it "remaps when `s3_base_url` changes" do
expect_remap(
target_site_name: target_site_name,
metadata: [no_multisite, s3_base_url],
from: "//old-bucket.s3-us-east-1.amazonaws.com/",
to: uploads_path(target_site_name)
)
end
it "remaps when `s3_cdn_url` changes" do
expect_remaps(
target_site_name: target_site_name,
metadata: [no_multisite, s3_cdn_url],
remaps: [
{ from: "https://s3-cdn.example.com/", to: "//#{target_hostname}#{uploads_path(target_site_name)}" },
{ from: "s3-cdn.example.com", to: target_hostname }
]
)
end
end
shared_examples "multisite remaps from S3" do
it "remaps when `s3_base_url` changes" do
expect_remap(
target_site_name: target_site_name,
metadata: [source_db_name, multisite, s3_base_url],
from: "//old-bucket.s3-us-east-1.amazonaws.com/",
to: "/"
)
end
it "remaps when `s3_cdn_url` changes" do
expect_remaps(
target_site_name: target_site_name,
metadata: [source_db_name, multisite, s3_cdn_url],
remaps: [
{ from: "https://s3-cdn.example.com/", to: "//#{target_hostname}/" },
{ from: "s3-cdn.example.com", to: target_hostname }
]
)
end
end
context "from regular site" do
let!(:source_site_type) { no_multisite }
context "to regular site" do
let!(:target_site_type) { no_multisite }
include_examples "common remaps"
include_examples "regular site remaps from S3"
end
context "to multisite", type: :multisite do
let!(:target_site_type) { multisite }
include_examples "common remaps"
include_examples "regular site remaps from S3"
end
end
context "from multisite" do
let!(:source_site_type) { multisite }
context "to regular site" do
let!(:target_site_type) { no_multisite }
include_examples "common remaps"
include_examples "multisite remaps from S3"
end
context "to multisite", type: :multisite do
let!(:target_site_type) { no_multisite }
include_examples "common remaps"
include_examples "multisite remaps from S3"
end
end
end
end
end
context "currently stored on S3" do
before do
SiteSetting.s3_upload_bucket = "s3-upload-bucket"
SiteSetting.s3_access_key_id = "s3-access-key-id"
SiteSetting.s3_secret_access_key = "s3-secret-access-key"
SiteSetting.enable_s3_uploads = true
end
let!(:store_class) { FileStore::S3Store }
include_context "no uploads"
include_context "restores uploads"
context "remaps" do
include_examples "without metadata"
context "uploads previously stored locally" do
let!(:s3_base_url) { { name: "s3_base_url", value: nil } }
let!(:s3_cdn_url) { { name: "s3_cdn_url", value: nil } }
context "from regular site" do
let!(:source_site_type) { no_multisite }
context "to regular site" do
let!(:target_site_type) { no_multisite }
include_examples "common remaps"
include_examples "remaps from local storage"
end
context "to multisite", type: :multisite do
let!(:target_site_type) { no_multisite }
include_examples "common remaps"
include_examples "remaps from local storage"
end
end
context "from multisite" do
let!(:source_site_type) { multisite }
context "to regular site" do
let!(:target_site_type) { no_multisite }
include_examples "common remaps"
include_examples "remaps from local storage"
end
context "to multisite", type: :multisite do
let!(:target_site_type) { multisite }
include_examples "common remaps"
include_examples "remaps from local storage"
end
end
end
context "uploads previously stored on S3" do
let!(:s3_base_url) { { name: "s3_base_url", value: "//old-bucket.s3-us-east-1.amazonaws.com" } }
let!(:s3_cdn_url) { { name: "s3_cdn_url", value: "https://s3-cdn.example.com" } }
shared_examples "regular site remaps from S3" do
it "remaps when `s3_base_url` changes" do
expect_remap(
target_site_name: target_site_name,
metadata: [no_multisite, s3_base_url],
from: "//old-bucket.s3-us-east-1.amazonaws.com/",
to: uploads_path(target_site_name)
)
end
it "remaps when `s3_cdn_url` changes" do
SiteSetting::Upload.expects(:s3_cdn_url).returns("https://new-s3-cdn.example.com").at_least_once
expect_remaps(
target_site_name: target_site_name,
metadata: [no_multisite, s3_cdn_url],
remaps: [
{ from: "https://s3-cdn.example.com/", to: "https://new-s3-cdn.example.com#{uploads_path(target_site_name)}" },
{ from: "s3-cdn.example.com", to: "new-s3-cdn.example.com" }
]
)
end
end
shared_examples "multisite remaps from S3" do
it "remaps when `s3_base_url` changes" do
expect_remap(
target_site_name: target_site_name,
metadata: [source_db_name, multisite, s3_base_url],
from: "//old-bucket.s3-us-east-1.amazonaws.com/",
to: "/"
)
end
context "when `s3_cdn_url` is configured" do
it "remaps when `s3_cdn_url` changes" do
SiteSetting::Upload.expects(:s3_cdn_url).returns("http://new-s3-cdn.example.com").at_least_once
expect_remaps(
target_site_name: target_site_name,
metadata: [source_db_name, multisite, s3_cdn_url],
remaps: [
{ from: "https://s3-cdn.example.com/", to: "//new-s3-cdn.example.com/" },
{ from: "s3-cdn.example.com", to: "new-s3-cdn.example.com" }
]
)
end
end
context "when `s3_cdn_url` is not configured" do
it "remaps to `base_url` when `s3_cdn_url` changes" do
SiteSetting::Upload.expects(:s3_cdn_url).returns(nil).at_least_once
expect_remaps(
target_site_name: target_site_name,
metadata: [source_db_name, multisite, s3_cdn_url],
remaps: [
{ from: "https://s3-cdn.example.com/", to: "//#{target_hostname}/" },
{ from: "s3-cdn.example.com", to: target_hostname }
]
)
end
end
end
context "from regular site" do
let!(:source_site_type) { no_multisite }
context "to regular site" do
let!(:target_site_name) { "default" }
let!(:target_hostname) { "test.localhost" }
include_examples "common remaps"
include_examples "regular site remaps from S3"
end
context "to multisite", type: :multisite do
let!(:target_site_name) { "second" }
let!(:target_hostname) { "test2.localhost" }
include_examples "common remaps"
include_examples "regular site remaps from S3"
end
end
context "from multisite" do
let!(:source_site_type) { multisite }
context "to regular site" do
let!(:target_site_type) { no_multisite }
include_examples "common remaps"
include_examples "multisite remaps from S3"
end
context "to multisite", type: :multisite do
let!(:target_site_type) { multisite }
include_examples "common remaps"
include_examples "multisite remaps from S3"
end
end
end
end
end
end
it "raises an exception when the store doesn't support the copy_from method" do
Discourse.stubs(:store).returns(Object.new)
with_temp_uploads_directory do |directory|
expect { subject.restore(directory) }.to raise_error(BackupRestore::UploadsRestoreError)
end
end
it "raises an exception when there are multiple folders in the uploads directory" do
with_temp_uploads_directory do |directory|
FileUtils.mkdir_p(File.join(directory, "uploads", "foo"))
expect { subject.restore(directory) }.to raise_error(BackupRestore::UploadsRestoreError)
end
end
it "ignores 'PaxHeaders' and hidden directories within the uploads directory" do
expect_remap(
source_site_name: "xylan",
target_site_name: "default",
from: "/uploads/xylan/",
to: uploads_path("default")
) do |directory|
FileUtils.mkdir_p(File.join(directory, "uploads", "PaxHeaders.27134"))
FileUtils.mkdir_p(File.join(directory, "uploads", ".hidden"))
end
end
end