Merge pull request #4546 from tgxworld/fix_postgresql_failover
Fix postgresql failover
This commit is contained in:
commit
b8441fba27
|
@ -87,5 +87,6 @@ RailsMultisite::ConnectionManagement.each_connection do
|
|||
end
|
||||
|
||||
if Rails.configuration.multisite
|
||||
Rails.logger.instance_variable_get(:@chained).first.formatter = RailsMultisite::Formatter.new
|
||||
chained = Rails.logger.instance_variable_get(:@chained)
|
||||
chained && chained.first.formatter = RailsMultisite::Formatter.new
|
||||
end
|
||||
|
|
|
@ -6,23 +6,46 @@ class PostgreSQLFallbackHandler
|
|||
include Singleton
|
||||
|
||||
def initialize
|
||||
@master = {}
|
||||
@running = {}
|
||||
@mutex = {}
|
||||
@last_check = {}
|
||||
|
||||
setup!
|
||||
@masters_down = {}
|
||||
@mutex = Mutex.new
|
||||
end
|
||||
|
||||
def verify_master
|
||||
@mutex[namespace].synchronize do
|
||||
return if running || recently_checked?
|
||||
@running[namespace] = true
|
||||
end
|
||||
synchronize { return if @thread && @thread.alive? }
|
||||
|
||||
current_namespace = namespace
|
||||
Thread.new do
|
||||
RailsMultisite::ConnectionManagement.with_connection(current_namespace) do
|
||||
@thread = Thread.new do
|
||||
while true do
|
||||
begin
|
||||
thread = Thread.new { initiate_fallback_to_master }
|
||||
thread.join
|
||||
break if synchronize { @masters_down.empty? }
|
||||
sleep 10
|
||||
ensure
|
||||
thread.kill
|
||||
end
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
def master_down?
|
||||
synchronize { @masters_down[namespace] }
|
||||
end
|
||||
|
||||
def master_down=(args)
|
||||
synchronize { @masters_down[namespace] = args }
|
||||
end
|
||||
|
||||
def master_up(namespace)
|
||||
synchronize { @masters_down.delete(namespace) }
|
||||
end
|
||||
|
||||
def running?
|
||||
synchronize { @thread.alive? }
|
||||
end
|
||||
|
||||
def initiate_fallback_to_master
|
||||
@masters_down.keys.each do |key|
|
||||
RailsMultisite::ConnectionManagement.with_connection(key) do
|
||||
begin
|
||||
logger.warn "#{log_prefix}: Checking master server..."
|
||||
connection = ActiveRecord::Base.postgresql_connection(config)
|
||||
|
@ -32,54 +55,20 @@ class PostgreSQLFallbackHandler
|
|||
ActiveRecord::Base.clear_all_connections!
|
||||
logger.warn "#{log_prefix}: Master server is active. Reconnecting..."
|
||||
|
||||
if namespace == RailsMultisite::ConnectionManagement::DEFAULT
|
||||
ActiveRecord::Base.establish_connection(config)
|
||||
else
|
||||
RailsMultisite::ConnectionManagement.establish_connection(db: namespace)
|
||||
end
|
||||
|
||||
self.master_up(key)
|
||||
Discourse.disable_readonly_mode
|
||||
self.master = true
|
||||
end
|
||||
rescue => e
|
||||
if e.message.include?("could not connect to server")
|
||||
logger.warn "#{log_prefix}: Connection to master PostgreSQL server failed with '#{e.message}'"
|
||||
else
|
||||
raise e
|
||||
end
|
||||
ensure
|
||||
@mutex[namespace].synchronize do
|
||||
@last_check[namespace] = Time.zone.now
|
||||
@running[namespace] = false
|
||||
end
|
||||
byebug
|
||||
logger.warn "#{log_prefix}: Connection to master PostgreSQL server failed with '#{e.message}'"
|
||||
end
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
def master
|
||||
@master[namespace]
|
||||
end
|
||||
|
||||
def master=(args)
|
||||
@master[namespace] = args
|
||||
end
|
||||
|
||||
def running
|
||||
@running[namespace]
|
||||
end
|
||||
|
||||
# Use for testing
|
||||
def setup!
|
||||
RailsMultisite::ConnectionManagement.all_dbs.each do |db|
|
||||
@master[db] = true
|
||||
@running[db] = false
|
||||
@mutex[db] = Mutex.new
|
||||
@last_check[db] = nil
|
||||
end
|
||||
end
|
||||
|
||||
def verify?
|
||||
!master && !running
|
||||
@masters_down = {}
|
||||
end
|
||||
|
||||
private
|
||||
|
@ -96,17 +85,13 @@ class PostgreSQLFallbackHandler
|
|||
"#{self.class} [#{namespace}]"
|
||||
end
|
||||
|
||||
def recently_checked?
|
||||
if @last_check[namespace]
|
||||
Time.zone.now <= (@last_check[namespace] + 5.seconds)
|
||||
else
|
||||
false
|
||||
end
|
||||
end
|
||||
|
||||
def namespace
|
||||
RailsMultisite::ConnectionManagement.current_db
|
||||
end
|
||||
|
||||
def synchronize
|
||||
@mutex.synchronize { yield }
|
||||
end
|
||||
end
|
||||
|
||||
module ActiveRecord
|
||||
|
@ -115,7 +100,7 @@ module ActiveRecord
|
|||
fallback_handler = ::PostgreSQLFallbackHandler.instance
|
||||
config = config.symbolize_keys
|
||||
|
||||
if fallback_handler.verify?
|
||||
if fallback_handler.master_down?
|
||||
connection = postgresql_connection(config.dup.merge({
|
||||
host: config[:replica_host], port: config[:replica_port]
|
||||
}))
|
||||
|
@ -126,7 +111,8 @@ module ActiveRecord
|
|||
begin
|
||||
connection = postgresql_connection(config)
|
||||
rescue PG::ConnectionBad => e
|
||||
fallback_handler.master = false
|
||||
fallback_handler.master_down = true
|
||||
fallback_handler.verify_master
|
||||
raise e
|
||||
end
|
||||
end
|
||||
|
@ -141,20 +127,4 @@ module ActiveRecord
|
|||
raise "Replica database server is not in recovery mode." if value == 'f'
|
||||
end
|
||||
end
|
||||
|
||||
module ConnectionAdapters
|
||||
class PostgreSQLAdapter
|
||||
set_callback :checkout, :before, :switch_back?
|
||||
|
||||
private
|
||||
|
||||
def fallback_handler
|
||||
@fallback_handler ||= ::PostgreSQLFallbackHandler.instance
|
||||
end
|
||||
|
||||
def switch_back?
|
||||
fallback_handler.verify_master if fallback_handler.verify?
|
||||
end
|
||||
end
|
||||
end
|
||||
end
|
||||
|
|
|
@ -228,10 +228,12 @@ module Discourse
|
|||
|
||||
def self.keep_readonly_mode
|
||||
# extend the expiry by 1 minute every 30 seconds
|
||||
Thread.new do
|
||||
while readonly_mode?
|
||||
$redis.expire(READONLY_MODE_KEY, READONLY_MODE_KEY_TTL)
|
||||
sleep 30.seconds
|
||||
unless Rails.env.test?
|
||||
Thread.new do
|
||||
while readonly_mode?
|
||||
$redis.expire(READONLY_MODE_KEY, READONLY_MODE_KEY_TTL)
|
||||
sleep 30.seconds
|
||||
end
|
||||
end
|
||||
end
|
||||
end
|
||||
|
|
|
@ -48,8 +48,9 @@ describe ActiveRecord::ConnectionHandling do
|
|||
end
|
||||
|
||||
it 'should failover to a replica server' do
|
||||
current_threads = Thread.list
|
||||
|
||||
RailsMultisite::ConnectionManagement.stubs(:all_dbs).returns(['default', multisite_db])
|
||||
::PostgreSQLFallbackHandler.instance.setup!
|
||||
|
||||
[config, multisite_config].each do |configuration|
|
||||
ActiveRecord::Base.expects(:postgresql_connection).with(configuration).raises(PG::ConnectionBad)
|
||||
|
@ -60,7 +61,7 @@ describe ActiveRecord::ConnectionHandling do
|
|||
})).returns(@replica_connection)
|
||||
end
|
||||
|
||||
expect(postgresql_fallback_handler.master).to eq(true)
|
||||
expect(postgresql_fallback_handler.master_down?).to eq(nil)
|
||||
|
||||
expect { ActiveRecord::Base.postgresql_fallback_connection(config) }
|
||||
.to raise_error(PG::ConnectionBad)
|
||||
|
@ -68,10 +69,10 @@ describe ActiveRecord::ConnectionHandling do
|
|||
expect{ ActiveRecord::Base.postgresql_fallback_connection(config) }
|
||||
.to change{ Discourse.readonly_mode? }.from(false).to(true)
|
||||
|
||||
expect(postgresql_fallback_handler.master).to eq(false)
|
||||
expect(postgresql_fallback_handler.master_down?).to eq(true)
|
||||
|
||||
with_multisite_db(multisite_db) do
|
||||
expect(postgresql_fallback_handler.master).to eq(true)
|
||||
expect(postgresql_fallback_handler.master_down?).to eq(nil)
|
||||
|
||||
expect { ActiveRecord::Base.postgresql_fallback_connection(multisite_config) }
|
||||
.to raise_error(PG::ConnectionBad)
|
||||
|
@ -79,30 +80,18 @@ describe ActiveRecord::ConnectionHandling do
|
|||
expect{ ActiveRecord::Base.postgresql_fallback_connection(multisite_config) }
|
||||
.to change{ Discourse.readonly_mode? }.from(false).to(true)
|
||||
|
||||
expect(postgresql_fallback_handler.master).to eq(false)
|
||||
expect(postgresql_fallback_handler.master_down?).to eq(true)
|
||||
end
|
||||
|
||||
postgresql_fallback_handler.master_up(multisite_db)
|
||||
|
||||
ActiveRecord::Base.unstub(:postgresql_connection)
|
||||
|
||||
current_threads = Thread.list
|
||||
|
||||
expect{ ActiveRecord::Base.connection_pool.checkout }
|
||||
.to change{ Thread.list.size }.by(1)
|
||||
|
||||
# Ensure that we don't try to connect back to the replica when a thread
|
||||
# is running
|
||||
begin
|
||||
ActiveRecord::Base.postgresql_fallback_connection(config)
|
||||
rescue PG::ConnectionBad => e
|
||||
# This is expected if the thread finishes before the above is called.
|
||||
end
|
||||
|
||||
# Wait for the thread to finish execution
|
||||
(Thread.list - current_threads).each(&:join)
|
||||
postgresql_fallback_handler.initiate_fallback_to_master
|
||||
|
||||
expect(Discourse.readonly_mode?).to eq(false)
|
||||
|
||||
expect(PostgreSQLFallbackHandler.instance.master).to eq(true)
|
||||
expect(postgresql_fallback_handler.master_down?).to eq(nil)
|
||||
|
||||
expect(ActiveRecord::Base.connection_pool.connections.count).to eq(0)
|
||||
|
||||
|
|
Loading…
Reference in New Issue