diff --git a/lib/utils/dns_srv.rb b/lib/utils/dns_srv.rb index ddc4038d..8587ffac 100644 --- a/lib/utils/dns_srv.rb +++ b/lib/utils/dns_srv.rb @@ -11,7 +11,7 @@ module DiscourseAi .fetch("dns_srv_lookup:#{domain}", expires_in: 5.minutes) do resources = dns_srv_lookup_for_domain(domain) - select_server(resources) + server_election(resources) end end @@ -36,9 +36,29 @@ module DiscourseAi return resource if random_weight < 0 end + end - # fallback - resources.first + def self.server_available?(server) + begin + conn = Faraday.new { |f| f.adapter FinalDestination::FaradayAdapter } + conn.head("https://#{server.target}:#{server.port}") + true + rescue StandardError + false + end + end + + def self.server_election(resources) + return nil if resources.empty? + return resources.first if resources.length == 1 + + candidate = select_server(resources) + + if server_available?(candidate) + candidate + else + server_election(resources - [candidate]) + end end end end diff --git a/spec/lib/utils/dns_srv_spec.rb b/spec/lib/utils/dns_srv_spec.rb index 33a2115a..b54e8ed6 100644 --- a/spec/lib/utils/dns_srv_spec.rb +++ b/spec/lib/utils/dns_srv_spec.rb @@ -5,10 +5,10 @@ describe DiscourseAi::Utils::DnsSrv do let(:weighted_dns_results) do [ Resolv::DNS::Resource::IN::SRV.new(1, 1, 443, "service1.example.com"), - Resolv::DNS::Resource::IN::SRV.new(1, 2, 443, "service2.example.com"), - Resolv::DNS::Resource::IN::SRV.new(1, 2, 443, "service3.example.com"), - Resolv::DNS::Resource::IN::SRV.new(2, 1, 443, "service4.example.com"), - Resolv::DNS::Resource::IN::SRV.new(2, 1, 443, "service5.example.com"), + Resolv::DNS::Resource::IN::SRV.new(2, 2, 443, "service2.example.com"), + Resolv::DNS::Resource::IN::SRV.new(2, 2, 443, "service3.example.com"), + Resolv::DNS::Resource::IN::SRV.new(3, 1, 443, "service4.example.com"), + Resolv::DNS::Resource::IN::SRV.new(3, 1, 443, "service5.example.com"), ] end @@ -17,6 +17,10 @@ describe DiscourseAi::Utils::DnsSrv do Resolv::DNS.any_instance.stubs(:getresources).returns(weighted_dns_results) Discourse.cache.delete("dns_srv_lookup:#{domain}") + + (1..5).each do |i| + stub_request(:head, "https://service#{i}.example.com/").to_return(status: 200) + end end it "picks a server" do @@ -31,4 +35,52 @@ describe DiscourseAi::Utils::DnsSrv do expect(weighted_dns_results.filter { |r| r.priority == 1 }).to include(selected_server) end end + + context "when the first server is not available" do + before do + Resolv::DNS.any_instance.stubs(:getresources).returns(weighted_dns_results) + + Discourse.cache.delete("dns_srv_lookup:#{domain}") + + stub_request(:head, "https://service1.example.com/").to_raise(Faraday::SSLError) + + (2..5).each do |i| + stub_request(:head, "https://service#{i}.example.com/").to_return(status: 200) + end + end + + it "picks another server" do + selected_server = DiscourseAi::Utils::DnsSrv.lookup(domain) + + expect(weighted_dns_results).to include(selected_server) + expect(selected_server.port).to eq(443) + expect(selected_server.target.to_s).to_not eq("service1.example.com") + end + end + + context "when the multiple servers are not available" do + before do + Resolv::DNS.any_instance.stubs(:getresources).returns(weighted_dns_results) + + Discourse.cache.delete("dns_srv_lookup:#{domain}") + + (1..3).each do |i| + stub_request(:head, "https://service#{i}.example.com/").to_raise(Faraday::SSLError) + end + + (4..5).each do |i| + stub_request(:head, "https://service#{i}.example.com/").to_return(status: 200) + end + end + + it "picks another server" do + selected_server = DiscourseAi::Utils::DnsSrv.lookup(domain) + + expect(weighted_dns_results).to include(selected_server) + expect(selected_server.port).to eq(443) + expect(selected_server.target.to_s).to_not eq("service1.example.com") + expect(selected_server.target.to_s).to_not eq("service2.example.com") + expect(selected_server.target.to_s).to_not eq("service3.example.com") + end + end end