Merge pull request #4546 from tgxworld/fix_postgresql_failover

Fix postgresql failover
This commit is contained in:
Guo Xiang Tan 2016-11-30 09:36:52 +01:00 committed by GitHub
commit b8441fba27
4 changed files with 66 additions and 104 deletions

View File

@ -87,5 +87,6 @@ RailsMultisite::ConnectionManagement.each_connection do
end
if Rails.configuration.multisite
Rails.logger.instance_variable_get(:@chained).first.formatter = RailsMultisite::Formatter.new
chained = Rails.logger.instance_variable_get(:@chained)
chained && chained.first.formatter = RailsMultisite::Formatter.new
end

View File

@ -6,23 +6,46 @@ class PostgreSQLFallbackHandler
include Singleton
def initialize
@master = {}
@running = {}
@mutex = {}
@last_check = {}
setup!
@masters_down = {}
@mutex = Mutex.new
end
def verify_master
@mutex[namespace].synchronize do
return if running || recently_checked?
@running[namespace] = true
end
synchronize { return if @thread && @thread.alive? }
current_namespace = namespace
Thread.new do
RailsMultisite::ConnectionManagement.with_connection(current_namespace) do
@thread = Thread.new do
while true do
begin
thread = Thread.new { initiate_fallback_to_master }
thread.join
break if synchronize { @masters_down.empty? }
sleep 10
ensure
thread.kill
end
end
end
end
def master_down?
synchronize { @masters_down[namespace] }
end
def master_down=(args)
synchronize { @masters_down[namespace] = args }
end
def master_up(namespace)
synchronize { @masters_down.delete(namespace) }
end
def running?
synchronize { @thread.alive? }
end
def initiate_fallback_to_master
@masters_down.keys.each do |key|
RailsMultisite::ConnectionManagement.with_connection(key) do
begin
logger.warn "#{log_prefix}: Checking master server..."
connection = ActiveRecord::Base.postgresql_connection(config)
@ -32,54 +55,20 @@ class PostgreSQLFallbackHandler
ActiveRecord::Base.clear_all_connections!
logger.warn "#{log_prefix}: Master server is active. Reconnecting..."
if namespace == RailsMultisite::ConnectionManagement::DEFAULT
ActiveRecord::Base.establish_connection(config)
else
RailsMultisite::ConnectionManagement.establish_connection(db: namespace)
end
self.master_up(key)
Discourse.disable_readonly_mode
self.master = true
end
rescue => e
if e.message.include?("could not connect to server")
logger.warn "#{log_prefix}: Connection to master PostgreSQL server failed with '#{e.message}'"
else
raise e
end
ensure
@mutex[namespace].synchronize do
@last_check[namespace] = Time.zone.now
@running[namespace] = false
end
byebug
logger.warn "#{log_prefix}: Connection to master PostgreSQL server failed with '#{e.message}'"
end
end
end
end
def master
@master[namespace]
end
def master=(args)
@master[namespace] = args
end
def running
@running[namespace]
end
# Use for testing
def setup!
RailsMultisite::ConnectionManagement.all_dbs.each do |db|
@master[db] = true
@running[db] = false
@mutex[db] = Mutex.new
@last_check[db] = nil
end
end
def verify?
!master && !running
@masters_down = {}
end
private
@ -96,17 +85,13 @@ class PostgreSQLFallbackHandler
"#{self.class} [#{namespace}]"
end
def recently_checked?
if @last_check[namespace]
Time.zone.now <= (@last_check[namespace] + 5.seconds)
else
false
end
end
def namespace
RailsMultisite::ConnectionManagement.current_db
end
def synchronize
@mutex.synchronize { yield }
end
end
module ActiveRecord
@ -115,7 +100,7 @@ module ActiveRecord
fallback_handler = ::PostgreSQLFallbackHandler.instance
config = config.symbolize_keys
if fallback_handler.verify?
if fallback_handler.master_down?
connection = postgresql_connection(config.dup.merge({
host: config[:replica_host], port: config[:replica_port]
}))
@ -126,7 +111,8 @@ module ActiveRecord
begin
connection = postgresql_connection(config)
rescue PG::ConnectionBad => e
fallback_handler.master = false
fallback_handler.master_down = true
fallback_handler.verify_master
raise e
end
end
@ -141,20 +127,4 @@ module ActiveRecord
raise "Replica database server is not in recovery mode." if value == 'f'
end
end
module ConnectionAdapters
class PostgreSQLAdapter
set_callback :checkout, :before, :switch_back?
private
def fallback_handler
@fallback_handler ||= ::PostgreSQLFallbackHandler.instance
end
def switch_back?
fallback_handler.verify_master if fallback_handler.verify?
end
end
end
end

View File

@ -228,10 +228,12 @@ module Discourse
def self.keep_readonly_mode
# extend the expiry by 1 minute every 30 seconds
Thread.new do
while readonly_mode?
$redis.expire(READONLY_MODE_KEY, READONLY_MODE_KEY_TTL)
sleep 30.seconds
unless Rails.env.test?
Thread.new do
while readonly_mode?
$redis.expire(READONLY_MODE_KEY, READONLY_MODE_KEY_TTL)
sleep 30.seconds
end
end
end
end

View File

@ -48,8 +48,9 @@ describe ActiveRecord::ConnectionHandling do
end
it 'should failover to a replica server' do
current_threads = Thread.list
RailsMultisite::ConnectionManagement.stubs(:all_dbs).returns(['default', multisite_db])
::PostgreSQLFallbackHandler.instance.setup!
[config, multisite_config].each do |configuration|
ActiveRecord::Base.expects(:postgresql_connection).with(configuration).raises(PG::ConnectionBad)
@ -60,7 +61,7 @@ describe ActiveRecord::ConnectionHandling do
})).returns(@replica_connection)
end
expect(postgresql_fallback_handler.master).to eq(true)
expect(postgresql_fallback_handler.master_down?).to eq(nil)
expect { ActiveRecord::Base.postgresql_fallback_connection(config) }
.to raise_error(PG::ConnectionBad)
@ -68,10 +69,10 @@ describe ActiveRecord::ConnectionHandling do
expect{ ActiveRecord::Base.postgresql_fallback_connection(config) }
.to change{ Discourse.readonly_mode? }.from(false).to(true)
expect(postgresql_fallback_handler.master).to eq(false)
expect(postgresql_fallback_handler.master_down?).to eq(true)
with_multisite_db(multisite_db) do
expect(postgresql_fallback_handler.master).to eq(true)
expect(postgresql_fallback_handler.master_down?).to eq(nil)
expect { ActiveRecord::Base.postgresql_fallback_connection(multisite_config) }
.to raise_error(PG::ConnectionBad)
@ -79,30 +80,18 @@ describe ActiveRecord::ConnectionHandling do
expect{ ActiveRecord::Base.postgresql_fallback_connection(multisite_config) }
.to change{ Discourse.readonly_mode? }.from(false).to(true)
expect(postgresql_fallback_handler.master).to eq(false)
expect(postgresql_fallback_handler.master_down?).to eq(true)
end
postgresql_fallback_handler.master_up(multisite_db)
ActiveRecord::Base.unstub(:postgresql_connection)
current_threads = Thread.list
expect{ ActiveRecord::Base.connection_pool.checkout }
.to change{ Thread.list.size }.by(1)
# Ensure that we don't try to connect back to the replica when a thread
# is running
begin
ActiveRecord::Base.postgresql_fallback_connection(config)
rescue PG::ConnectionBad => e
# This is expected if the thread finishes before the above is called.
end
# Wait for the thread to finish execution
(Thread.list - current_threads).each(&:join)
postgresql_fallback_handler.initiate_fallback_to_master
expect(Discourse.readonly_mode?).to eq(false)
expect(PostgreSQLFallbackHandler.instance.master).to eq(true)
expect(postgresql_fallback_handler.master_down?).to eq(nil)
expect(ActiveRecord::Base.connection_pool.connections.count).to eq(0)