discourse/lib/backup_restore/restorer.rb

684 lines
22 KiB
Ruby
Raw Normal View History

# frozen_string_literal: true
require_dependency "db_helper"
module BackupRestore
2014-02-12 23:32:58 -05:00
class RestoreDisabledError < RuntimeError; end
2014-02-12 23:32:58 -05:00
class FilenameMissingError < RuntimeError; end
class Restorer
attr_reader :success
def self.pg_produces_portable_dump?(version)
# anything pg 11 or above will produce a non-portable dump
return false if version.to_i >= 11
# below 11, the behaviour was changed in multiple different minor
# versions depending on major release line - we list those versions below
gem_version = Gem::Version.new(version)
%w{
10.3
9.6.8
9.5.12
9.4.17
9.3.22
}.each do |unportable_version|
return false if Gem::Dependency.new("", "~> #{unportable_version}").match?("", gem_version)
end
true
end
2017-07-27 21:20:09 -04:00
def initialize(user_id, opts = {})
@user_id = user_id
@client_id = opts[:client_id]
@filename = opts[:filename]
@publish_to_message_bus = opts[:publish_to_message_bus] || false
@disable_emails = opts.fetch(:disable_emails, true)
2014-02-12 23:32:58 -05:00
ensure_restore_is_enabled
2014-02-12 23:32:58 -05:00
ensure_no_operation_is_running
ensure_we_have_a_user
ensure_we_have_a_filename
initialize_state
end
def run
log "[STARTED]"
log "'#{@user_info[:username]}' has started the restore!"
mark_restore_as_running
2014-02-12 23:32:58 -05:00
listen_for_shutdown_signal
ensure_directory_exists(@tmp_directory)
copy_archive_to_tmp_directory
decompress_archive
2014-02-12 23:32:58 -05:00
extract_metadata
validate_metadata
extract_dump
create_missing_discourse_functions
2018-03-09 01:28:50 -05:00
if !can_restore_into_different_schema?
2018-03-15 12:14:08 -04:00
log "Cannot restore into different schema, restoring in-place"
enable_readonly_mode
pause_sidekiq
wait_for_sidekiq
BackupRestore.move_tables_between_schemas("public", "backup")
@db_was_changed = true
restore_dump
else
2018-03-15 12:14:08 -04:00
log "Restoring into 'backup' schema"
restore_dump
enable_readonly_mode
pause_sidekiq
wait_for_sidekiq
switch_schema!
end
2014-02-12 23:32:58 -05:00
migrate_database
reconnect_database
reload_site_settings
clear_emoji_cache
disable_readonly_mode
clear_theme_cache
2016-01-18 19:01:17 -05:00
extract_uploads
after_restore_hook
rescue Compression::Strategy::ExtractFailed
log "The uncompressed file is too big. Consider increasing the decompressed_theme_max_file_size_mb hidden setting."
rollback
2014-02-12 23:32:58 -05:00
rescue SystemExit
log "Restore process was cancelled!"
rollback
rescue => ex
2014-02-12 23:32:58 -05:00
log "EXCEPTION: " + ex.message
log ex.backtrace.join("\n")
rollback
else
@success = true
ensure
clean_up
notify_user
log "Finished!"
2014-02-12 23:32:58 -05:00
@success ? log("[SUCCESS]") : log("[FAILED]")
end
### The methods listed below are public just for testing purposes.
### This is not a good practice, but we need to be sure that our new compression API will work.
attr_reader :tmp_directory
def ensure_directory_exists(directory)
log "Making sure #{directory} exists..."
FileUtils.mkdir_p(directory)
end
def copy_archive_to_tmp_directory
if @store.remote?
log "Downloading archive to tmp directory..."
failure_message = "Failed to download archive to tmp directory."
else
log "Copying archive to tmp directory..."
failure_message = "Failed to copy archive to tmp directory."
end
@store.download_file(@filename, @archive_filename, failure_message)
end
def decompress_archive
return unless @is_archive
log "Unzipping archive, this may take a while..."
pipeline = Compression::Pipeline.new([Compression::Tar.new, Compression::Gzip.new])
unzipped_path = pipeline.decompress(@tmp_directory, @archive_filename, available_size)
pipeline.strip_directory(unzipped_path, @tmp_directory)
end
def extract_metadata
metadata_path = File.join(@tmp_directory, BackupRestore::METADATA_FILE)
@metadata = if File.exists?(metadata_path)
data = Oj.load_file(@meta_filename)
raise "Failed to load metadata file." if !data
data
else
log "No metadata file to extract."
if @filename =~ /-#{BackupRestore::VERSION_PREFIX}(\d{14})/
{ "version" => Regexp.last_match[1].to_i }
else
raise "Migration version is missing from the filename."
end
end
end
def extract_dump
@dump_filename =
if @is_archive
# For backwards compatibility
old_dump_path = File.join(@tmp_directory, BackupRestore::OLD_DUMP_FILE)
File.exists?(old_dump_path) ? old_dump_path : File.join(@tmp_directory, BackupRestore::DUMP_FILE)
else
File.join(@tmp_directory, @filename)
end
log "Extracting dump file..."
Compression::Gzip.new.decompress(@tmp_directory, @dump_filename, available_size)
end
2014-02-12 23:32:58 -05:00
protected
def available_size
SiteSetting.decompressed_backup_max_file_size_mb
end
def ensure_restore_is_enabled
raise BackupRestore::RestoreDisabledError unless Rails.env.development? || SiteSetting.allow_restore?
2014-02-12 23:32:58 -05:00
end
def ensure_no_operation_is_running
raise BackupRestore::OperationRunningError if BackupRestore.is_operation_running?
end
def ensure_we_have_a_user
user = User.find_by(id: @user_id)
2014-02-12 23:32:58 -05:00
raise Discourse::InvalidParameters.new(:user_id) unless user
# keep some user data around to check them against the newly restored database
@user_info = { id: user.id, username: user.username, email: user.email }
end
def ensure_we_have_a_filename
raise BackupRestore::FilenameMissingError if @filename.nil?
2014-02-12 23:32:58 -05:00
end
def initialize_state
@success = false
@store = BackupRestore::BackupStore.create
@db_was_changed = false
2014-02-12 23:32:58 -05:00
@current_db = RailsMultisite::ConnectionManagement.current_db
@current_version = BackupRestore.current_version
@timestamp = Time.now.strftime("%Y-%m-%d-%H%M%S")
@tmp_directory = File.join(Rails.root, "tmp", "restores", @current_db, @timestamp)
@archive_filename = File.join(@tmp_directory, @filename)
@tar_filename = @archive_filename[0...-3]
@is_archive = !(@filename =~ /.sql.gz$/)
@logs = []
@readonly_mode_was_enabled = Discourse.readonly_mode?
@created_functions_for_table_columns = []
2014-02-12 23:32:58 -05:00
end
def listen_for_shutdown_signal
Thread.new do
while BackupRestore.is_operation_running?
exit if BackupRestore.should_shutdown?
sleep 0.1
end
end
end
def mark_restore_as_running
2014-02-12 23:32:58 -05:00
log "Marking restore as running..."
BackupRestore.mark_as_running!
end
def enable_readonly_mode
return if @readonly_mode_was_enabled
2014-02-12 23:32:58 -05:00
log "Enabling readonly mode..."
Discourse.enable_readonly_mode
end
def pause_sidekiq
log "Pausing sidekiq..."
Sidekiq.pause!
end
def wait_for_sidekiq
log "Waiting for sidekiq to finish running jobs..."
iterations = 1
while sidekiq_has_running_jobs?
log "Waiting for sidekiq to finish running jobs... ##{iterations}"
2014-02-12 23:32:58 -05:00
sleep 5
iterations += 1
raise "Sidekiq did not finish running all the jobs in the allowed time!" if iterations > 6
2014-02-12 23:32:58 -05:00
end
end
def sidekiq_has_running_jobs?
Sidekiq::Workers.new.each do |_, _, worker|
payload = worker.try(:payload)
return true if payload.try(:all_sites)
return true if payload.try(:current_site_id) == @current_db
end
false
end
2014-02-12 23:32:58 -05:00
def validate_metadata
log "Validating metadata..."
log " Current version: #{@current_version}"
raise "Metadata has not been extracted correctly." if !@metadata
2014-02-12 23:32:58 -05:00
log " Restored version: #{@metadata["version"]}"
error = "You're trying to restore a more recent version of the schema. You should migrate first!"
2014-02-12 23:32:58 -05:00
raise error if @metadata["version"] > @current_version
end
def get_dumped_by_version
output = Discourse::Utils.execute_command(
File.extname(@dump_filename) == '.gz' ? 'zgrep' : 'grep',
2018-03-09 02:48:12 -05:00
'-m1', @dump_filename, '-e', "-- Dumped by pg_dump version",
failure_message: "Failed to check version of pg_dump used to generate the dump file"
)
output.match(/version (\d+(\.\d+)+)/)[1]
end
2018-03-09 01:28:50 -05:00
def can_restore_into_different_schema?
self.class.pg_produces_portable_dump?(get_dumped_by_version)
2018-03-09 01:28:50 -05:00
end
def restore_dump_command
if File.extname(@dump_filename) == '.gz'
"#{sed_command} #{@dump_filename.gsub('.gz', '')} | #{psql_command} 2>&1"
else
"#{psql_command} 2>&1 < #{@dump_filename}"
end
end
2014-02-12 23:32:58 -05:00
def restore_dump
log "Restoring dump file... (can be quite long)"
logs = Queue.new
psql_running = true
has_error = false
Thread.new do
RailsMultisite::ConnectionManagement::establish_connection(db: @current_db)
2014-02-12 23:32:58 -05:00
while psql_running
message = logs.pop.strip
has_error ||= (message =~ /ERROR:/)
log(message) unless message.blank?
end
end
IO.popen(restore_dump_command) do |pipe|
2014-02-12 23:32:58 -05:00
begin
while line = pipe.readline
logs << line
end
rescue EOFError
# finished reading...
ensure
psql_running = false
logs << ""
end
end
# psql does not return a valid exit code when an error happens
raise "psql failed" if has_error
end
def psql_command
db_conf = BackupRestore.database_configuration
password_argument = "PGPASSWORD='#{db_conf.password}'" if db_conf.password.present?
host_argument = "--host=#{db_conf.host}" if db_conf.host.present?
port_argument = "--port=#{db_conf.port}" if db_conf.port.present?
username_argument = "--username=#{db_conf.username}" if db_conf.username.present?
[ password_argument, # pass the password to psql (if any)
"psql", # the psql command
"--dbname='#{db_conf.database}'", # connect to database *dbname*
"--single-transaction", # all or nothing (also runs COPY commands faster)
host_argument, # the hostname to connect to (if any)
port_argument, # the port to connect to (if any)
username_argument # the username to connect as (if any)
2014-02-12 23:32:58 -05:00
].join(" ")
end
def sed_command
# in order to limit the downtime when restoring as much as possible
# we force the restoration to happen in the "restore" schema
# during the restoration, this make sure we
# - drop the "restore" schema if it exists
# - create the "restore" schema
# - prepend the "restore" schema into the search_path
regexp = "SET search_path = public, pg_catalog;"
replacement = [ "DROP SCHEMA IF EXISTS restore CASCADE;",
"CREATE SCHEMA restore;",
"SET search_path = restore, public, pg_catalog;",
].join(" ")
# we only want to replace the VERY first occurence of the search_path command
expression = "1,/^#{regexp}$/s/#{regexp}/#{replacement}/"
"sed -e '#{expression}'"
end
2014-02-12 23:32:58 -05:00
def switch_schema!
2015-03-09 11:11:15 -04:00
log "Switching schemas... try reloading the site in 5 minutes, if successful, then reboot and restore is complete."
2014-02-12 23:32:58 -05:00
sql = [
"BEGIN;",
BackupRestore.move_tables_between_schemas_sql("public", "backup"),
BackupRestore.move_tables_between_schemas_sql("restore", "public"),
"COMMIT;"
].join("\n")
2014-02-12 23:32:58 -05:00
@db_was_changed = true
DB.exec(sql)
2014-02-12 23:32:58 -05:00
end
def migrate_database
log "Migrating the database..."
if Discourse.skip_post_deployment_migrations?
ENV["SKIP_POST_DEPLOYMENT_MIGRATIONS"] = "0"
Rails.application.config.paths['db/migrate'] << Rails.root.join(
Discourse::DB_POST_MIGRATE_PATH
).to_s
end
2014-02-12 23:32:58 -05:00
Discourse::Application.load_tasks
ENV["VERSION"] = @current_version.to_s
DB.exec("SET search_path = public, pg_catalog;")
Rake::Task["db:migrate"].invoke
2014-02-12 23:32:58 -05:00
end
def reconnect_database
log "Reconnecting to the database..."
RailsMultisite::ConnectionManagement::reload if RailsMultisite::ConnectionManagement::instance
RailsMultisite::ConnectionManagement::establish_connection(db: @current_db)
end
def reload_site_settings
log "Reloading site settings..."
SiteSetting.refresh!
DiscourseEvent.trigger(:site_settings_restored)
if @disable_emails && SiteSetting.disable_emails == 'no'
log "Disabling outgoing emails for non-staff users..."
user = User.find_by_email(@user_info[:email]) || Discourse.system_user
SiteSetting.set_and_log(:disable_emails, 'non-staff', user)
end
2014-02-12 23:32:58 -05:00
end
2015-03-17 12:29:18 -04:00
def clear_emoji_cache
log "Clearing emoji cache..."
Emoji.clear_cache
end
2014-02-12 23:32:58 -05:00
def extract_uploads
return unless File.exists?(File.join(@tmp_directory, 'uploads'))
log "Extracting uploads..."
public_uploads_path = File.join(Rails.root, "public")
upload_path = Discourse.store.upload_path
FileUtils.mkdir_p(File.join(public_uploads_path, "uploads"))
tmp_uploads_path = Dir.glob(File.join(@tmp_directory, "uploads", "*")).first
return if tmp_uploads_path.blank?
previous_db_name = BackupMetadata.value_for("db_name") || File.basename(tmp_uploads_path)
optimized_images_exist = File.exist?(File.join(tmp_uploads_path, 'optimized'))
Discourse::Utils.execute_command(
'rsync', '-avp', '--safe-links', "#{tmp_uploads_path}/", "#{upload_path}/",
failure_message: "Failed to restore uploads.",
chdir: public_uploads_path
)
remap_uploads(previous_db_name, upload_path)
if SiteSetting.Upload.enable_s3_uploads
migrate_to_s3
remove_local_uploads(File.join(public_uploads_path, upload_path))
2014-02-12 23:32:58 -05:00
end
generate_optimized_images unless optimized_images_exist
2014-02-12 23:32:58 -05:00
end
def remap_uploads(previous_db_name, upload_path)
log "Remapping uploads..."
was_multisite = BackupMetadata.value_for("multisite") == "t"
uploads_folder = was_multisite ? "/" : "/#{upload_path}/"
if (old_base_url = BackupMetadata.value_for("base_url")) && old_base_url != Discourse.base_url
remap(old_base_url, Discourse.base_url)
end
current_s3_base_url = SiteSetting.Upload.enable_s3_uploads ? SiteSetting.Upload.s3_base_url : nil
if (old_s3_base_url = BackupMetadata.value_for("s3_base_url")) && old_base_url != current_s3_base_url
remap("#{old_s3_base_url}/", uploads_folder)
end
current_s3_cdn_url = SiteSetting.Upload.enable_s3_uploads ? SiteSetting.Upload.s3_cdn_url : nil
if (old_s3_cdn_url = BackupMetadata.value_for("s3_cdn_url")) && old_s3_cdn_url != current_s3_cdn_url
base_url = SiteSetting.Upload.enable_s3_uploads ? SiteSetting.Upload.s3_cdn_url : Discourse.base_url
remap("#{old_s3_cdn_url}/", UrlHelper.schemaless("#{base_url}#{uploads_folder}"))
old_host = URI.parse(old_s3_cdn_url).host
new_host = URI.parse(base_url).host
remap(old_host, new_host)
end
if (old_cdn_url = BackupMetadata.value_for("cdn_url")) && old_cdn_url != Discourse.asset_host
base_url = Discourse.asset_host || Discourse.base_url
remap("#{old_cdn_url}/", UrlHelper.schemaless("#{base_url}/"))
old_host = URI.parse(old_cdn_url).host
new_host = URI.parse(base_url).host
remap(old_host, new_host)
end
current_db_name = RailsMultisite::ConnectionManagement.current_db
if previous_db_name != current_db_name
remap("uploads/#{previous_db_name}", upload_path)
end
rescue => ex
log "Something went wrong while remapping uploads.", ex
end
def remap(from, to)
puts "Remapping '#{from}' to '#{to}'"
DbHelper.remap(from, to, verbose: true, excluded_tables: ["backup_metadata"])
end
def migrate_to_s3
log "Migrating uploads to S3..."
ENV["SKIP_FAILED"] = "1"
ENV["MIGRATE_TO_MULTISITE"] = "1" if Rails.configuration.multisite
Rake::Task["uploads:migrate_to_s3"].invoke
Jobs.run_later!
end
def remove_local_uploads(directory)
log "Removing local uploads directory..."
FileUtils.rm_rf(directory) if Dir[directory].present?
rescue => ex
log "Something went wrong while removing the following uploads directory: #{directory}", ex
end
def generate_optimized_images
log 'Optimizing site icons...'
DB.exec("TRUNCATE TABLE optimized_images")
SiteIconManager.ensure_optimized!
log 'Posts will be rebaked by a background job in sidekiq. You will see missing images until that has completed.'
log 'You can expedite the process by manually running "rake posts:rebake_uncooked_posts"'
DB.exec(<<~SQL)
UPDATE posts
SET baked_version = NULL
WHERE id IN (SELECT post_id FROM post_uploads)
SQL
User.where("uploaded_avatar_id IS NOT NULL").find_each do |user|
2019-05-02 04:08:12 -04:00
Jobs.enqueue(:create_avatar_thumbnails, upload_id: user.uploaded_avatar_id)
end
end
2014-02-12 23:32:58 -05:00
def rollback
log "Trying to rollback..."
if @db_was_changed && BackupRestore.can_rollback?
log "Rolling back..."
BackupRestore.move_tables_between_schemas("backup", "public")
2014-02-12 23:32:58 -05:00
else
log "There was no need to rollback"
2014-02-12 23:32:58 -05:00
end
end
def notify_user
if user = User.find_by_email(@user_info[:email])
log "Notifying '#{user.username}' of the end of the restore..."
status = @success ? :restore_succeeded : :restore_failed
SystemMessage.create_from_system_user(user, status,
logs: Discourse::Utils.pretty_logs(@logs)
)
else
log "Could not send notification to '#{@user_info[:username]}' (#{@user_info[:email]}), because the user does not exists..."
end
rescue => ex
log "Something went wrong while notifying user.", ex
end
def create_missing_discourse_functions
log "Creating missing functions in the discourse_functions schema"
all_readonly_table_columns = []
Dir[Rails.root.join(Discourse::DB_POST_MIGRATE_PATH, "*.rb")].each do |path|
require path
class_name = File.basename(path, ".rb").sub(/^\d+_/, "").camelize
migration_class = class_name.constantize
if migration_class.const_defined?(:DROPPED_TABLES)
migration_class::DROPPED_TABLES.each do |table_name|
all_readonly_table_columns << [table_name]
end
end
if migration_class.const_defined?(:DROPPED_COLUMNS)
migration_class::DROPPED_COLUMNS.each do |table_name, column_names|
column_names.each do |column_name|
all_readonly_table_columns << [table_name, column_name]
end
end
end
end
existing_function_names = Migration::BaseDropper.existing_discourse_function_names.map { |name| "#{name}()" }
all_readonly_table_columns.each do |table_name, column_name|
function_name = Migration::BaseDropper.readonly_function_name(table_name, column_name, with_schema: false)
if !existing_function_names.include?(function_name)
Migration::BaseDropper.create_readonly_function(table_name, column_name)
@created_functions_for_table_columns << [table_name, column_name]
end
end
end
2014-02-12 23:32:58 -05:00
def clean_up
log "Cleaning stuff up..."
drop_created_discourse_functions
2014-02-12 23:32:58 -05:00
remove_tmp_directory
unpause_sidekiq
disable_readonly_mode if Discourse.readonly_mode?
mark_restore_as_not_running
2014-02-12 23:32:58 -05:00
end
def remove_tmp_directory
log "Removing tmp '#{@tmp_directory}' directory..."
FileUtils.rm_rf(@tmp_directory) if Dir[@tmp_directory].present?
rescue => ex
log "Something went wrong while removing the following tmp directory: #{@tmp_directory}", ex
2014-02-12 23:32:58 -05:00
end
def unpause_sidekiq
log "Unpausing sidekiq..."
Sidekiq.unpause!
rescue => ex
log "Something went wrong while unpausing Sidekiq.", ex
2014-02-12 23:32:58 -05:00
end
def clear_theme_cache
log "Clear theme cache"
ThemeField.force_recompilation!
Theme.expire_site_cache!
Stylesheet::Manager.cache.clear
end
def drop_created_discourse_functions
log "Dropping function from the discourse_functions schema"
@created_functions_for_table_columns.each do |table_name, column_name|
Migration::BaseDropper.drop_readonly_function(table_name, column_name)
end
rescue => ex
log "Something went wrong while dropping functions from the discourse_functions schema", ex
end
2014-02-12 23:32:58 -05:00
def disable_readonly_mode
return if @readonly_mode_was_enabled
2014-02-12 23:32:58 -05:00
log "Disabling readonly mode..."
Discourse.disable_readonly_mode
rescue => ex
log "Something went wrong while disabling readonly mode.", ex
2014-02-12 23:32:58 -05:00
end
def mark_restore_as_not_running
2014-02-12 23:32:58 -05:00
log "Marking restore as finished..."
BackupRestore.mark_as_not_running!
rescue => ex
log "Something went wrong while marking restore as finished.", ex
2014-02-12 23:32:58 -05:00
end
def after_restore_hook
log "Executing the after_restore_hook..."
DiscourseEvent.trigger(:restore_complete)
end
def log(message, ex = nil)
return if Rails.env.test?
timestamp = Time.now.strftime("%Y-%m-%d %H:%M:%S")
puts(message)
publish_log(message, timestamp)
save_log(message, timestamp)
Rails.logger.error("#{ex}\n" + ex.backtrace.join("\n")) if ex
2014-02-12 23:32:58 -05:00
end
def publish_log(message, timestamp)
2014-02-12 23:32:58 -05:00
return unless @publish_to_message_bus
data = { timestamp: timestamp, operation: "restore", message: message }
MessageBus.publish(BackupRestore::LOGS_CHANNEL, data, user_ids: [@user_id], client_ids: [@client_id])
2014-02-12 23:32:58 -05:00
end
def save_log(message, timestamp)
@logs << "[#{timestamp}] #{message}"
end
2014-02-12 23:32:58 -05:00
end
end