DEV: Use rails_failover gem for ActiveRecord and Redis failover handling

This commit is contained in:
Guo Xiang Tan 2020-06-11 13:45:46 +08:00
parent 6780d4d70c
commit 58e52c0e4f
No known key found for this signature in database
GPG Key ID: FBD110179AAC1F20
11 changed files with 84 additions and 466 deletions

View File

@ -250,4 +250,4 @@ gem 'webpush', require: false
gem 'colored2', require: false
gem 'maxminddb'
gem 'rails_failover', require: false, git: 'https://github.com/discourse/rails_failover'
gem 'rails_failover', require: false

View File

@ -1,11 +1,3 @@
GIT
remote: https://github.com/discourse/rails_failover
revision: 66602aa73785851b81c506f0023d3c2a2e40de0a
specs:
rails_failover (0.4.0)
activerecord (~> 6.0)
railties (~> 6.0)
GEM
remote: https://rubygems.org/
specs:
@ -288,6 +280,9 @@ GEM
nokogiri (>= 1.6)
rails-html-sanitizer (1.3.0)
loofah (~> 2.3)
rails_failover (0.5.0)
activerecord (~> 6.0)
railties (~> 6.0)
rails_multisite (2.3.0)
activerecord (> 5.0, < 7)
railties (> 5.0, < 7)
@ -526,7 +521,7 @@ DEPENDENCIES
rack (= 2.2.2)
rack-mini-profiler
rack-protection
rails_failover!
rails_failover
rails_multisite
railties (= 6.0.3.1)
rake

View File

@ -134,12 +134,6 @@ class GlobalSetting
end
end
if hash["replica_host"]
if !ENV["ACTIVE_RECORD_RAILS_FAILOVER"]
hash["adapter"] = "postgresql_fallback"
end
end
hostnames = [ hostname ]
hostnames << backup_hostname if backup_hostname.present?
@ -170,15 +164,9 @@ class GlobalSetting
c[:port] = redis_port if redis_port
if redis_slave_host && redis_slave_port
if ENV["REDIS_RAILS_FAILOVER"]
c[:replica_host] = redis_slave_host
c[:replica_port] = redis_slave_port
c[:connector] = RailsFailover::Redis::Connector
else
c[:slave_host] = redis_slave_host
c[:slave_port] = redis_slave_port
c[:connector] = DiscourseRedis::Connector
end
c[:replica_host] = redis_slave_host
c[:replica_port] = redis_slave_port
c[:connector] = RailsFailover::Redis::Connector
end
c[:password] = redis_password if redis_password.present?
@ -200,15 +188,9 @@ class GlobalSetting
c[:port] = message_bus_redis_port if message_bus_redis_port
if message_bus_redis_slave_host && message_bus_redis_slave_port
if ENV["REDIS_RAILS_FAILOVER"]
c[:replica_host] = message_bus_redis_slave_host
c[:replica_port] = message_bus_redis_slave_port
c[:connector] = RailsFailover::Redis::Connector
else
c[:slave_host] = message_bus_redis_slave_host
c[:slave_port] = message_bus_redis_slave_port
c[:connector] = DiscourseRedis::Connector
end
c[:replica_host] = message_bus_redis_slave_host
c[:replica_port] = message_bus_redis_slave_port
c[:connector] = RailsFailover::Redis::Connector
end
c[:password] = message_bus_redis_password if message_bus_redis_password.present?

View File

@ -19,6 +19,8 @@ require 'action_controller/railtie'
require 'action_view/railtie'
require 'action_mailer/railtie'
require 'sprockets/railtie'
require 'rails_failover/active_record'
require 'rails_failover/redis'
# Plugin related stuff
require_relative '../lib/plugin_initialization_guard'
@ -27,14 +29,6 @@ require_relative '../lib/discourse_plugin_registry'
require_relative '../lib/plugin_gem'
if ENV['ACTIVE_RECORD_RAILS_FAILOVER']
require 'rails_failover/active_record'
end
if ENV['REDIS_RAILS_FAILOVER']
require 'rails_failover/redis'
end
# Global config
require_relative '../app/models/global_setting'
GlobalSetting.configure!

View File

@ -1,81 +1,51 @@
# frozen_string_literal: true
if ENV["REDIS_RAILS_FAILOVER"]
message_bus_keepalive_interval = nil
message_bus_keepalive_interval = nil
RailsFailover::Redis.on_failover do
message_bus_keepalive_interval = MessageBus.keepalive_interval
MessageBus.keepalive_interval = -1 # Disable MessageBus keepalive_interval
Discourse.received_redis_readonly!
end
RailsFailover::Redis.on_failover do
message_bus_keepalive_interval = MessageBus.keepalive_interval
MessageBus.keepalive_interval = -1 # Disable MessageBus keepalive_interval
Discourse.received_redis_readonly!
end
RailsFailover::Redis.on_fallback do
Discourse.clear_redis_readonly!
Discourse.request_refresh!
MessageBus.keepalive_interval = message_bus_keepalive_interval
RailsFailover::Redis.on_fallback do
Discourse.clear_redis_readonly!
Discourse.request_refresh!
MessageBus.keepalive_interval = message_bus_keepalive_interval
end
if Rails.configuration.multisite
if ActiveRecord::Base.current_role == ActiveRecord::Base.reading_role
RailsMultisite::ConnectionManagement.default_connection_handler =
ActiveRecord::Base.connection_handlers[ActiveRecord::Base.reading_role]
end
end
if ENV["ACTIVE_RECORD_RAILS_FAILOVER"]
RailsFailover::ActiveRecord.on_failover do
RailsMultisite::ConnectionManagement.each_connection do
Discourse.enable_readonly_mode(Discourse::PG_READONLY_MODE_KEY)
Sidekiq.pause!("pg_failover") if !Sidekiq.paused?
end
end
RailsFailover::ActiveRecord.on_fallback do
RailsMultisite::ConnectionManagement.each_connection do
Discourse.disable_readonly_mode(Discourse::PG_READONLY_MODE_KEY)
Sidekiq.unpause! if Sidekiq.paused?
end
if Rails.configuration.multisite
if ActiveRecord::Base.current_role == ActiveRecord::Base.reading_role
RailsMultisite::ConnectionManagement.default_connection_handler =
ActiveRecord::Base.connection_handlers[ActiveRecord::Base.reading_role]
end
end
RailsFailover::ActiveRecord.on_failover do
RailsMultisite::ConnectionManagement.each_connection do
Discourse.enable_readonly_mode(Discourse::PG_READONLY_MODE_KEY)
Sidekiq.pause!("pg_failover") if !Sidekiq.paused?
end
rescue => e
Rails.logger.warn "#{e.class} #{e.message}: #{e.backtrace.join("\n")}"
false
end
RailsFailover::ActiveRecord.on_fallback do
RailsMultisite::ConnectionManagement.each_connection do
Discourse.disable_readonly_mode(Discourse::PG_READONLY_MODE_KEY)
Sidekiq.unpause! if Sidekiq.paused?
end
if Rails.configuration.multisite
RailsMultisite::ConnectionManagement.default_connection_handler =
ActiveRecord::Base.connection_handlers[ActiveRecord::Base.writing_role]
end
rescue => e
Rails.logger.warn "#{e.class} #{e.message}: #{e.backtrace.join("\n")}"
false
end
module Discourse
PG_FORCE_READONLY_MODE_KEY ||= 'readonly_mode:postgres_force'
READONLY_KEYS.push(PG_FORCE_READONLY_MODE_KEY)
def self.enable_pg_force_readonly_mode
Discourse.redis.set(PG_FORCE_READONLY_MODE_KEY, 1)
Sidekiq.pause!("pg_failover") if !Sidekiq.paused?
MessageBus.publish(readonly_channel, true)
true
end
def self.disable_pg_force_readonly_mode
result = Discourse.redis.del(PG_FORCE_READONLY_MODE_KEY)
Sidekiq.unpause!
MessageBus.publish(readonly_channel, false)
result > 0
end
end
RailsFailover::ActiveRecord.register_force_reading_role_callback do
Discourse.redis.exists?(
Discourse::PG_READONLY_MODE_KEY,
Discourse::PG_FORCE_READONLY_MODE_KEY
)
rescue => e
Rails.logger.warn "#{e.class} #{e.message}: #{e.backtrace.join("\n")}"
false
RailsMultisite::ConnectionManagement.default_connection_handler =
ActiveRecord::Base.connection_handlers[ActiveRecord::Base.writing_role]
end
end
RailsFailover::ActiveRecord.register_force_reading_role_callback do
Discourse.redis.exists?(
Discourse::PG_READONLY_MODE_KEY,
Discourse::PG_FORCE_READONLY_MODE_KEY
)
rescue => e
Rails.logger.warn "#{e.class} #{e.message}: #{e.backtrace.join("\n")}"
false
end

View File

@ -24,12 +24,7 @@ if Rails.configuration.multisite
# Multisite needs to be first, because the request tracker and message bus rely on it
Rails.configuration.middleware.unshift RailsMultisite::Middleware, RailsMultisite::DiscoursePatches.config
Rails.configuration.middleware.delete ActionDispatch::Executor
end
if ENV["ACTIVE_RECORD_RAILS_FAILOVER"]
if Rails.configuration.multisite
Rails.configuration.middleware.insert_after(RailsMultisite::Middleware, RailsFailover::ActiveRecord::Middleware)
else
Rails.configuration.middleware.insert_before(MessageBus::Rack::Middleware, RailsFailover::ActiveRecord::Middleware)
end
Rails.configuration.middleware.insert_after(RailsMultisite::Middleware, RailsFailover::ActiveRecord::Middleware)
else
Rails.configuration.middleware.insert_before(MessageBus::Rack::Middleware, RailsFailover::ActiveRecord::Middleware)
end

View File

@ -1,190 +0,0 @@
# frozen_string_literal: true
require 'active_record/connection_adapters/abstract_adapter'
require 'active_record/connection_adapters/postgresql_adapter'
require 'discourse'
require 'sidekiq/pausable'
class PostgreSQLFallbackHandler
include Singleton
attr_reader :masters_down
attr_accessor :initialized
DATABASE_DOWN_CHANNEL = '/global/database_down'
def initialize
@masters_down = DistributedCache.new('masters_down', namespace: false)
@mutex = Mutex.new
@initialized = false
MessageBus.subscribe(DATABASE_DOWN_CHANNEL) do |payload|
if @initialized && payload.data["pid"].to_i != Process.pid
begin
RailsMultisite::ConnectionManagement.with_connection(payload.data['db']) do
clear_connections
end
rescue PG::UnableToSend
# Site has already failed over
end
end
end
end
def verify_master
synchronize do
return if @thread && @thread.alive?
@thread = Thread.new do
while true do
thread = Thread.new { initiate_fallback_to_master }
thread.abort_on_exception = true
thread.join
break if synchronize { @masters_down.hash.empty? }
sleep 5
end
end
@thread.abort_on_exception = true
end
end
def master_down?
synchronize { @masters_down[namespace] }
end
def master_down
synchronize do
@masters_down[namespace] = true
Sidekiq.pause!("pg_failover") if !Sidekiq.paused?
MessageBus.publish(DATABASE_DOWN_CHANNEL, db: namespace, pid: Process.pid)
end
end
def master_up(namespace)
synchronize { @masters_down.delete(namespace, publish: false) }
end
def initiate_fallback_to_master
begin
unless @initialized
@initialized = true
return
end
@masters_down.hash.keys.each do |key|
RailsMultisite::ConnectionManagement.with_connection(key) do
begin
logger.warn "#{log_prefix}: Checking master server..."
is_connection_active = false
begin
connection = ActiveRecord::Base.postgresql_connection(config)
is_connection_active = connection.active?
ensure
connection.disconnect! if connection
end
if is_connection_active
logger.warn "#{log_prefix}: Master server is active. Reconnecting..."
self.master_up(key)
clear_connections
disable_readonly_mode
Sidekiq.unpause!
end
rescue => e
logger.warn "#{log_prefix}: Connection to master PostgreSQL server failed with '#{e.message}'"
end
end
end
rescue => e
logger.warn "#{e.class} #{e.message}: #{e.backtrace.join("\n")}"
end
end
# Use for testing
def setup!
@masters_down.clear
disable_readonly_mode
end
def clear_connections
ActiveRecord::Base.clear_all_connections!
end
private
def disable_readonly_mode
Discourse.disable_readonly_mode(Discourse::PG_READONLY_MODE_KEY)
end
def config
ActiveRecord::Base.connection_config
end
def logger
Rails.logger
end
def log_prefix
"#{self.class} [#{namespace}]"
end
def namespace
RailsMultisite::ConnectionManagement.current_db
end
def synchronize
@mutex.synchronize { yield }
end
end
module ActiveRecord
module ConnectionHandling
def postgresql_fallback_connection(config)
return postgresql_connection(config) if ARGV.include?("db:migrate")
fallback_handler = ::PostgreSQLFallbackHandler.instance
config = config.symbolize_keys
if fallback_handler.master_down?
Discourse.enable_readonly_mode(Discourse::PG_READONLY_MODE_KEY)
fallback_handler.verify_master
connection = replica_postgresql_connection(config)
else
begin
connection = postgresql_connection(config)
fallback_handler.initialized ||= true
rescue ::ActiveRecord::NoDatabaseError, PG::ConnectionBad => e
fallback_handler.master_down
fallback_handler.verify_master
if !fallback_handler.initialized
return postgresql_fallback_connection(config)
else
raise e
end
end
end
connection
end
def replica_postgresql_connection(config)
config = config.dup.merge(
host: config[:replica_host],
port: config[:replica_port]
)
connection = postgresql_connection(config)
verify_replica(connection)
connection
end
private
def verify_replica(connection)
value = connection.exec_query("SELECT pg_is_in_recovery()").rows[0][0]
raise "Replica database server is not in recovery mode." if !value
end
end
end

View File

@ -428,19 +428,21 @@ module Discourse
alias_method :base_url_no_path, :base_url_no_prefix
end
READONLY_MODE_KEY_TTL ||= 60
READONLY_MODE_KEY ||= 'readonly_mode'
PG_READONLY_MODE_KEY ||= 'readonly_mode:postgres'
USER_READONLY_MODE_KEY ||= 'readonly_mode:user'
READONLY_MODE_KEY_TTL ||= 60
READONLY_MODE_KEY ||= 'readonly_mode'
PG_READONLY_MODE_KEY ||= 'readonly_mode:postgres'
USER_READONLY_MODE_KEY ||= 'readonly_mode:user'
PG_FORCE_READONLY_MODE_KEY ||= 'readonly_mode:postgres_force'
READONLY_KEYS ||= [
READONLY_MODE_KEY,
PG_READONLY_MODE_KEY,
USER_READONLY_MODE_KEY
USER_READONLY_MODE_KEY,
PG_FORCE_READONLY_MODE_KEY
]
def self.enable_readonly_mode(key = READONLY_MODE_KEY)
if key == USER_READONLY_MODE_KEY
if key == USER_READONLY_MODE_KEY || key == PG_FORCE_READONLY_MODE_KEY
Discourse.redis.set(key, 1)
else
Discourse.redis.setex(key, READONLY_MODE_KEY_TTL, 1)
@ -486,6 +488,24 @@ module Discourse
true
end
def self.enable_pg_force_readonly_mode
RailsMultisite::ConnectionManagement.each_connection do
enable_readonly_mode(PG_FORCE_READONLY_MODE_KEY)
Sidekiq.pause!("pg_failover") if !Sidekiq.paused?
end
true
end
def self.disable_pg_force_readonly_mode
RailsMultisite::ConnectionManagement.each_connection do
disable_readonly_mode(PG_FORCE_READONLY_MODE_KEY)
Sidekiq.unpause!
end
true
end
def self.readonly_mode?(keys = READONLY_KEYS)
recently_readonly? || Discourse.redis.exists?(*keys)
end

View File

@ -5,141 +5,6 @@
#
class DiscourseRedis
class FallbackHandler
include Singleton
MASTER_ROLE_STATUS = "role:master"
MASTER_LOADING_STATUS = "loading:1"
MASTER_LOADED_STATUS = "loading:0"
CONNECTION_TYPES = %w{normal pubsub}
def initialize
@master = true
@running = false
@mutex = Mutex.new
@slave_config = DiscourseRedis.slave_config
@message_bus_keepalive_interval = MessageBus.keepalive_interval
end
def verify_master
synchronize do
return if @thread && @thread.alive?
@thread = Thread.new do
loop do
begin
thread = Thread.new { initiate_fallback_to_master }
thread.join
break if synchronize { @master }
sleep 5
ensure
thread.kill
end
end
end
end
end
def initiate_fallback_to_master
success = false
begin
redis_config = DiscourseRedis.config.dup
redis_config.delete(:connector)
master_client = ::Redis::Client.new(redis_config)
logger.warn "#{log_prefix}: Checking connection to master server..."
info = master_client.call([:info])
if info.include?(MASTER_LOADED_STATUS) && info.include?(MASTER_ROLE_STATUS)
begin
logger.warn "#{log_prefix}: Master server is active, killing all connections to slave..."
self.master = true
slave_client = ::Redis::Client.new(@slave_config)
CONNECTION_TYPES.each do |connection_type|
slave_client.call([:client, [:kill, 'type', connection_type]])
end
MessageBus.keepalive_interval = @message_bus_keepalive_interval
Discourse.clear_readonly!
Discourse.request_refresh!
success = true
ensure
slave_client&.disconnect
end
end
rescue => e
logger.warn "#{log_prefix}: Connection to Master server failed with '#{e.message}'"
ensure
master_client&.disconnect
end
success
end
def master
synchronize { @master }
end
def master=(args)
synchronize do
@master = args
# Disables MessageBus keepalive when Redis is in readonly mode
MessageBus.keepalive_interval = 0 if !@master
end
end
private
def synchronize
@mutex.synchronize { yield }
end
def logger
Rails.logger
end
def log_prefix
"#{self.class}"
end
end
class Connector < Redis::Client::Connector
def initialize(options)
super(options)
@slave_options = DiscourseRedis.slave_config(options)
@fallback_handler = DiscourseRedis::FallbackHandler.instance
end
def resolve(client = nil)
if !@fallback_handler.master
@fallback_handler.verify_master
return @slave_options
end
begin
options = @options.dup
options.delete(:connector)
client ||= Redis::Client.new(options)
loading = client.call([:info, :persistence]).include?(
DiscourseRedis::FallbackHandler::MASTER_LOADING_STATUS
)
loading ? @slave_options : @options
rescue Redis::ConnectionError, Redis::CannotConnectError, RuntimeError => ex
raise ex if ex.class == RuntimeError && ex.message != "Name or service not known"
@fallback_handler.master = false
@fallback_handler.verify_master
raise ex
ensure
client.disconnect
end
end
end
def self.raw_connection(config = nil)
config ||= self.config
Redis.new(config)
@ -149,20 +14,12 @@ class DiscourseRedis
GlobalSetting.redis_config
end
def self.slave_config(options = config)
options.dup.merge!(host: options[:slave_host], port: options[:slave_port])
end
def initialize(config = nil, namespace: true)
@config = config || DiscourseRedis.config
@redis = DiscourseRedis.raw_connection(@config.dup)
@namespace = namespace
end
def self.fallback_handler
@fallback_handler ||= DiscourseRedis::FallbackHandler.instance
end
def without_namespace
# Only use this if you want to store and fetch data that's shared between sites
@redis
@ -172,10 +29,6 @@ class DiscourseRedis
yield
rescue Redis::CommandError => ex
if ex.message =~ /READONLY/
if !ENV["REDIS_RAILS_FAILOVER"]
fallback_handler.verify_master if !fallback_handler.master
end
Discourse.received_redis_readonly!
nil
else

View File

@ -9,7 +9,6 @@ module ActiveSupport::Dependencies::ZeitwerkIntegration::Inflector
'onpdiff' => 'ONPDiff',
'onceoff' => 'Jobs',
'pop3_polling_enabled_setting_validator' => 'POP3PollingEnabledSettingValidator',
'postgresql_fallback_adapter' => 'PostgreSQLFallbackHandler',
'regular' => 'Jobs',
'scheduled' => 'Jobs',
'topic_query_sql' => 'TopicQuerySQL',

View File

@ -86,7 +86,7 @@ describe GlobalSetting do
GlobalSetting.expects(:redis_slave_port).returns(6379).at_least_once
GlobalSetting.expects(:redis_slave_host).returns('0.0.0.0').at_least_once
expect(GlobalSetting.redis_config[:connector]).to eq(DiscourseRedis::Connector)
expect(GlobalSetting.redis_config[:connector]).to eq(RailsFailover::Redis::Connector)
end
end
end