From c50d74f2d48683269a1c450ccf3f0d35eb6a7732 Mon Sep 17 00:00:00 2001 From: Will Gant Date: Fri, 4 Nov 2022 18:25:21 +0000 Subject: [PATCH 1/4] Diego client tries all bbs domain IPs Mitigates the risk of 'Runner is unavailable' errors when an outage makes some (but not all) bbs instances unreachable, by trying all IPs before re-resolving. bbs operates as a cluster, with only one instance at a time 'active' and able to respond to API requests. At present, whenever the Diego::Client needs to communicate with bbs it hits the URL passed in from config.diego.bbs.url. The default value passed in for this from the capi-release is https://bbs.service.cf.internal:8889. This domain is defined by cf-deployment as a bosh-dns alias for the diego-api instance group with query q-s4 (i.e. resolving to all instances, whether healthy or unhealthy, in a random order). The diego client previously made three attempts to reach bbs. Upon a network timeout its HTTP client would bail and a retry would be attempted with a fresh DNS resolution. This returned a list of bbs IPs in a random order, and with two instances there was a 1/8 chance to get the same unreachable instance's IP first in the list on all three attempts. --- lib/diego/client.rb | 55 +++++++++----- spec/diego/client_spec.rb | 150 +++++++++++++++++++------------------- 2 files changed, 113 insertions(+), 92 deletions(-) diff --git a/lib/diego/client.rb b/lib/diego/client.rb index 2325db95805..39338af2538 100644 --- a/lib/diego/client.rb +++ b/lib/diego/client.rb @@ -1,6 +1,8 @@ require 'diego/bbs/bbs' require 'diego/errors' require 'diego/routes' +require 'uri' +require 'resolv' module Diego class Client @@ -9,8 +11,8 @@ class Client def initialize(url:, ca_cert_file:, client_cert_file:, client_key_file:, connect_timeout:, send_timeout:, receive_timeout:) ENV['PB_IGNORE_DEPRECATIONS'] ||= 'true' + @bbs_url = url @client = build_client( - url, ca_cert_file, client_cert_file, client_key_file, @@ -21,7 +23,7 @@ def initialize(url:, ca_cert_file:, client_cert_file:, client_key_file:, def ping response = with_request_error_handling do - client.post(Routes::PING) + client.post(URI.join(bbs_url_from_ip, Routes::PING)) end validate_status!(response: response, statuses: [200]) @@ -32,7 +34,7 @@ def upsert_domain(domain:, ttl:) request = protobuf_encode!({ domain: domain, ttl: ttl.to_i }, Bbs::Models::UpsertDomainRequest) response = with_request_error_handling do - client.post(Routes::UPSERT_DOMAIN, request, PROTOBUF_HEADER) + client.post(URI.join(bbs_url_from_ip, Routes::UPSERT_DOMAIN), request, PROTOBUF_HEADER) end validate_status!(response: response, statuses: [200]) @@ -43,7 +45,7 @@ def desire_task(task_definition:, domain:, task_guid:) request = protobuf_encode!({ task_definition: task_definition, domain: domain, task_guid: task_guid }, Bbs::Models::DesireTaskRequest) response = with_request_error_handling do - client.post(Routes::DESIRE_TASK, request, PROTOBUF_HEADER) + client.post(URI.join(bbs_url_from_ip, Routes::DESIRE_TASK), request, PROTOBUF_HEADER) end validate_status!(response: response, statuses: [200]) @@ -54,7 +56,7 @@ def task_by_guid(task_guid) request = protobuf_encode!({ task_guid: task_guid }, Bbs::Models::TaskByGuidRequest) response = with_request_error_handling do - client.post(Routes::TASK_BY_GUID, request, PROTOBUF_HEADER) + client.post(URI.join(bbs_url_from_ip, Routes::TASK_BY_GUID), request, PROTOBUF_HEADER) end validate_status!(response: response, statuses: [200]) @@ -65,7 +67,7 @@ def tasks(domain: '', cell_id: '') request = protobuf_encode!({ domain: domain, cell_id: cell_id }, Bbs::Models::TasksRequest) response = with_request_error_handling do - client.post(Routes::LIST_TASKS, request, PROTOBUF_HEADER) + client.post(URI.join(bbs_url_from_ip, Routes::LIST_TASKS), request, PROTOBUF_HEADER) end validate_status!(response: response, statuses: [200]) @@ -76,7 +78,7 @@ def cancel_task(task_guid) request = protobuf_encode!({ task_guid: task_guid }, Bbs::Models::TaskGuidRequest) response = with_request_error_handling do - client.post(Routes::CANCEL_TASK, request, PROTOBUF_HEADER) + client.post(URI.join(bbs_url_from_ip, Routes::CANCEL_TASK), request, PROTOBUF_HEADER) end validate_status!(response: response, statuses: [200]) @@ -87,7 +89,7 @@ def desire_lrp(lrp) request = protobuf_encode!({ desired_lrp: lrp }, Bbs::Models::DesireLRPRequest) response = with_request_error_handling do - client.post(Routes::DESIRE_LRP, request, PROTOBUF_HEADER) + client.post(URI.join(bbs_url_from_ip, Routes::DESIRE_LRP), request, PROTOBUF_HEADER) end validate_status!(response: response, statuses: [200]) @@ -98,7 +100,7 @@ def desired_lrp_by_process_guid(process_guid) request = protobuf_encode!({ process_guid: process_guid }, Bbs::Models::DesiredLRPByProcessGuidRequest) response = with_request_error_handling do - client.post(Routes::DESIRED_LRP_BY_PROCESS_GUID, request, PROTOBUF_HEADER) + client.post(URI.join(bbs_url_from_ip, Routes::DESIRED_LRP_BY_PROCESS_GUID), request, PROTOBUF_HEADER) end validate_status!(response: response, statuses: [200]) @@ -109,7 +111,7 @@ def update_desired_lrp(process_guid, lrp_update) request = protobuf_encode!({ process_guid: process_guid, update: lrp_update }, Bbs::Models::UpdateDesiredLRPRequest) response = with_request_error_handling do - client.post(Routes::UPDATE_DESIRED_LRP, request, PROTOBUF_HEADER) + client.post(URI.join(bbs_url_from_ip, Routes::UPDATE_DESIRED_LRP), request, PROTOBUF_HEADER) end validate_status!(response: response, statuses: [200]) @@ -120,7 +122,7 @@ def remove_desired_lrp(process_guid) request = protobuf_encode!({ process_guid: process_guid }, Bbs::Models::RemoveDesiredLRPRequest) response = with_request_error_handling do - client.post(Routes::REMOVE_DESIRED_LRP, request, PROTOBUF_HEADER) + client.post(URI.join(bbs_url_from_ip, Routes::REMOVE_DESIRED_LRP), request, PROTOBUF_HEADER) end validate_status!(response: response, statuses: [200]) @@ -131,7 +133,7 @@ def retire_actual_lrp(actual_lrp_key) request = protobuf_encode!({ actual_lrp_key: actual_lrp_key }, Bbs::Models::RetireActualLRPRequest) response = with_request_error_handling do - client.post(Routes::RETIRE_ACTUAL_LRP, request, PROTOBUF_HEADER) + client.post(URI.join(bbs_url_from_ip, Routes::RETIRE_ACTUAL_LRP), request, PROTOBUF_HEADER) end validate_status!(response: response, statuses: [200]) @@ -142,7 +144,7 @@ def desired_lrp_scheduling_infos(domain) request = protobuf_encode!({ domain: domain }, Bbs::Models::DesiredLRPsRequest) response = with_request_error_handling do - client.post(Routes::DESIRED_LRP_SCHEDULING_INFOS, request, PROTOBUF_HEADER) + client.post(URI.join(bbs_url_from_ip, Routes::DESIRED_LRP_SCHEDULING_INFOS), request, PROTOBUF_HEADER) end validate_status!(response: response, statuses: [200]) @@ -153,7 +155,7 @@ def actual_lrps_by_process_guid(process_guid) request = protobuf_encode!({ process_guid: process_guid }, Bbs::Models::ActualLRPsRequest) response = with_request_error_handling do - client.post(Routes::ACTUAL_LRPS, request, PROTOBUF_HEADER) + client.post(URI.join(bbs_url_from_ip, Routes::ACTUAL_LRPS), request, PROTOBUF_HEADER) end validate_status!(response: response, statuses: [200]) @@ -164,13 +166,20 @@ def with_request_error_handling(&blk) tries ||= 3 yield rescue => e - retry unless (tries -= 1).zero? + ips_remaining.shift + retry unless ips_remaining.empty? && (tries -= 1).zero? raise RequestError.new(e.message) end + def bbs_ip + self.ips_remaining = bbs_ip_addresses if ips_remaining.nil? || ips_remaining.empty? + ips_remaining.first + end + private - attr_reader :client + attr_reader :client, :bbs_url + attr_accessor :ips_remaining def protobuf_encode!(hash, protobuf_message_class) # See below link to understand proto3 message encoding @@ -190,9 +199,19 @@ def protobuf_decode!(message, protobuf_decoder) raise DecodeError.new(e.message) end - def build_client(url, ca_cert_file, client_cert_file, client_key_file, + def bbs_ip_addresses + Resolv.getaddresses(URI(bbs_url).host).dup + end + + def bbs_url_from_ip + uri = URI(bbs_url) + uri.host = bbs_ip + uri.to_s + end + + def build_client(ca_cert_file, client_cert_file, client_key_file, connect_timeout, send_timeout, receive_timeout) - client = HTTPClient.new(base_url: url) + client = HTTPClient.new client.connect_timeout = connect_timeout client.send_timeout = send_timeout client.receive_timeout = receive_timeout diff --git a/spec/diego/client_spec.rb b/spec/diego/client_spec.rb index ffacaad0aa2..5b1909a7f81 100644 --- a/spec/diego/client_spec.rb +++ b/spec/diego/client_spec.rb @@ -3,15 +3,22 @@ module Diego RSpec.describe Client do - let(:bbs_url) { 'https://bbs.example.com:4443' } + let(:bbs_domain) { 'bbs.example.com' } + let(:bbs_port) { '4443' } + let(:bbs_uri) { "https://#{bbs_domain}:#{bbs_port}" } let(:ca_cert_file) { File.join(Paths::FIXTURES, 'certs/bbs_ca.crt') } let(:client_cert_file) { File.join(Paths::FIXTURES, 'certs/bbs_client.crt') } let(:client_key_file) { File.join(Paths::FIXTURES, 'certs/bbs_client.key') } + let(:bbs_ip_1) { '1.2.3.4' } + let(:bbs_ip_2) { '5.6.7.8' } subject(:client) do - Client.new(url: bbs_url, ca_cert_file: ca_cert_file, client_cert_file: client_cert_file, client_key_file: client_key_file, + Client.new(url: bbs_uri, ca_cert_file: ca_cert_file, client_cert_file: client_cert_file, client_key_file: client_key_file, connect_timeout: 10, send_timeout: 10, receive_timeout: 10) end + before do + allow(::Resolv).to receive(:getaddresses).with(bbs_domain).and_return([bbs_ip_1, bbs_ip_2]) + end describe 'configuration' do it "should set ENV['PB_IGNORE_DEPRECATIONS'] to true" do @@ -26,12 +33,12 @@ module Diego let(:response_status) { 200 } before do - stub_request(:post, 'https://bbs.example.com:4443/v1/ping').to_return(status: response_status, body: response_body) + stub_request(:post, "https://#{bbs_ip_1}:4443/v1/ping").to_return(status: response_status, body: response_body) end it 'returns a ping response' do expect(client.ping.available).to be_truthy - expect(a_request(:post, 'https://bbs.example.com:4443/v1/ping')).to have_been_made + expect(a_request(:post, "https://#{bbs_ip_1}:4443/v1/ping")).to have_been_made end context 'when it does not return successfully' do @@ -45,7 +52,8 @@ module Diego context 'when it fails to make the request' do before do - stub_request(:post, 'https://bbs.example.com:4443/v1/ping').to_raise(StandardError.new('error message')) + stub_request(:post, "https://#{bbs_ip_1}:4443/v1/ping").to_raise(StandardError.new('error message')) + stub_request(:post, "https://#{bbs_ip_2}:4443/v1/ping").to_raise(StandardError.new('error message')) end it 'raises' do @@ -69,7 +77,7 @@ module Diego let(:ttl) { VCAP::CloudController::Diego::APP_LRP_DOMAIN_TTL } before do - stub_request(:post, 'https://bbs.example.com:4443/v1/domains/upsert').to_return(status: response_status, body: response_body) + stub_request(:post, "https://#{bbs_ip_1}:4443/v1/domains/upsert").to_return(status: response_status, body: response_body) end it 'returns a domain lifecycle response' do @@ -78,7 +86,7 @@ module Diego response = client.upsert_domain(domain: domain, ttl: ttl) expect(response.error).to be_nil - expect(a_request(:post, 'https://bbs.example.com:4443/v1/domains/upsert').with( + expect(a_request(:post, "https://#{bbs_ip_1}:4443/v1/domains/upsert").with( body: Bbs::Models::UpsertDomainRequest.encode(expected_domain_request).to_s, headers: { 'Content-Type' => 'application/x-protobuf' } )).to have_been_made @@ -95,7 +103,8 @@ module Diego context 'when it fails to make the request' do before do - stub_request(:post, 'https://bbs.example.com:4443/v1/domains/upsert').to_raise(StandardError.new('error message')) + stub_request(:post, "https://#{bbs_ip_1}:4443/v1/domains/upsert").to_raise(StandardError.new('error message')) + stub_request(:post, "https://#{bbs_ip_2}:4443/v1/domains/upsert").to_raise(StandardError.new('error message')) end it 'raises' do @@ -124,7 +133,7 @@ module Diego let(:task_definition) { Bbs::Models::TaskDefinition.new } before do - stub_request(:post, 'https://bbs.example.com:4443/v1/tasks/desire.r2').to_return(status: response_status, body: response_body) + stub_request(:post, "https://#{bbs_ip_1}:4443/v1/tasks/desire.r2").to_return(status: response_status, body: response_body) end it 'returns a task lifecycle response' do @@ -133,7 +142,7 @@ module Diego response = client.desire_task(task_definition: task_definition, task_guid: 'task_guid', domain: 'domain') expect(response.error).to be_nil - expect(a_request(:post, 'https://bbs.example.com:4443/v1/tasks/desire.r2').with( + expect(a_request(:post, "https://#{bbs_ip_1}:4443/v1/tasks/desire.r2").with( body: Bbs::Models::DesireTaskRequest.encode(expected_task_request).to_s, headers: { 'Content-Type' => 'application/x-protobuf' } )).to have_been_made @@ -152,7 +161,8 @@ module Diego context 'when it fails to make the request' do before do - stub_request(:post, 'https://bbs.example.com:4443/v1/tasks/desire.r2').to_raise(StandardError.new('error message')) + stub_request(:post, "https://#{bbs_ip_1}:4443/v1/tasks/desire.r2").to_raise(StandardError.new('error message')) + stub_request(:post, "https://#{bbs_ip_2}:4443/v1/tasks/desire.r2").to_raise(StandardError.new('error message')) end it 'raises' do @@ -180,7 +190,7 @@ module Diego let(:response_status) { 200 } before do - stub_request(:post, 'https://bbs.example.com:4443/v1/tasks/list.r2').to_return(status: response_status, body: response_body) + stub_request(:post, "https://#{bbs_ip_1}:4443/v1/tasks/list.r2").to_return(status: response_status, body: response_body) end it 'returns a tasks response' do @@ -189,7 +199,7 @@ module Diego expected_request = Bbs::Models::TasksRequest.new expect(response.error).to be_nil - expect(a_request(:post, 'https://bbs.example.com:4443/v1/tasks/list.r2').with( + expect(a_request(:post, "https://#{bbs_ip_1}:4443/v1/tasks/list.r2").with( body: Bbs::Models::TasksRequest.encode(expected_request).to_s, headers: { 'Content-Type' => 'application/x-protobuf' } )).to have_been_made @@ -202,7 +212,7 @@ module Diego expected_request = Bbs::Models::TasksRequest.new(domain: 'some-domain') expect(response.error).to be_nil - expect(a_request(:post, 'https://bbs.example.com:4443/v1/tasks/list.r2').with( + expect(a_request(:post, "https://#{bbs_ip_1}:4443/v1/tasks/list.r2").with( body: Bbs::Models::TasksRequest.encode(expected_request).to_s, headers: { 'Content-Type' => 'application/x-protobuf' } )).to have_been_made @@ -214,7 +224,7 @@ module Diego expected_request = Bbs::Models::TasksRequest.new(cell_id: 'cell-id-thing') expect(response.error).to be_nil - expect(a_request(:post, 'https://bbs.example.com:4443/v1/tasks/list.r2').with( + expect(a_request(:post, "https://#{bbs_ip_1}:4443/v1/tasks/list.r2").with( body: Bbs::Models::TasksRequest.encode(expected_request).to_s, headers: { 'Content-Type' => 'application/x-protobuf' } )).to have_been_made @@ -232,7 +242,8 @@ module Diego context 'when it fails to make the request' do before do - stub_request(:post, 'https://bbs.example.com:4443/v1/tasks/list.r2').to_raise(StandardError.new('error message')) + stub_request(:post, "https://#{bbs_ip_1}:4443/v1/tasks/list.r2").to_raise(StandardError.new('error message')) + stub_request(:post, "https://#{bbs_ip_2}:4443/v1/tasks/list.r2").to_raise(StandardError.new('error message')) end it 'raises' do @@ -260,7 +271,7 @@ module Diego let(:response_status) { 200 } before do - stub_request(:post, 'https://bbs.example.com:4443/v1/tasks/get_by_task_guid.r2').to_return(status: response_status, body: response_body) + stub_request(:post, "https://#{bbs_ip_1}:4443/v1/tasks/get_by_task_guid.r2").to_return(status: response_status, body: response_body) end it 'returns a task response' do @@ -269,7 +280,7 @@ module Diego expected_request = Bbs::Models::TaskByGuidRequest.new(task_guid: 'some-guid') expect(response.error).to be_nil - expect(a_request(:post, 'https://bbs.example.com:4443/v1/tasks/get_by_task_guid.r2').with( + expect(a_request(:post, "https://#{bbs_ip_1}:4443/v1/tasks/get_by_task_guid.r2").with( body: Bbs::Models::TaskByGuidRequest.encode(expected_request).to_s, headers: { 'Content-Type' => 'application/x-protobuf' } )).to have_been_made @@ -286,7 +297,8 @@ module Diego context 'when it fails to make the request' do before do - stub_request(:post, 'https://bbs.example.com:4443/v1/tasks/get_by_task_guid.r2').to_raise(StandardError.new('error message')) + stub_request(:post, "https://#{bbs_ip_1}:4443/v1/tasks/get_by_task_guid.r2").to_raise(StandardError.new('error message')) + stub_request(:post, "https://#{bbs_ip_2}:4443/v1/tasks/get_by_task_guid.r2").to_raise(StandardError.new('error message')) end it 'raises' do @@ -314,7 +326,7 @@ module Diego let(:response_status) { 200 } before do - stub_request(:post, 'https://bbs.example.com:4443/v1/tasks/cancel').to_return(status: response_status, body: response_body) + stub_request(:post, "https://#{bbs_ip_1}:4443/v1/tasks/cancel").to_return(status: response_status, body: response_body) end it 'returns a task lifecycle response' do @@ -323,7 +335,7 @@ module Diego response = client.cancel_task('some-guid') expect(response.error).to be_nil - expect(a_request(:post, 'https://bbs.example.com:4443/v1/tasks/cancel').with( + expect(a_request(:post, "https://#{bbs_ip_1}:4443/v1/tasks/cancel").with( body: Bbs::Models::TaskGuidRequest.encode(expected_cancel_request).to_s, headers: { 'Content-Type' => 'application/x-protobuf' } )).to have_been_made @@ -340,7 +352,8 @@ module Diego context 'when it fails to make the request' do before do - stub_request(:post, 'https://bbs.example.com:4443/v1/tasks/cancel').to_raise(StandardError.new('error message')) + stub_request(:post, "https://#{bbs_ip_1}:4443/v1/tasks/cancel").to_raise(StandardError.new('error message')) + stub_request(:post, "https://#{bbs_ip_2}:4443/v1/tasks/cancel").to_raise(StandardError.new('error message')) end it 'raises' do @@ -369,7 +382,7 @@ module Diego let(:lrp) { ::Diego::Bbs::Models::DesiredLRP.new } before do - stub_request(:post, 'https://bbs.example.com:4443/v1/desired_lrp/desire.r2').to_return(status: response_status, body: response_body) + stub_request(:post, "https://#{bbs_ip_1}:4443/v1/desired_lrp/desire.r2").to_return(status: response_status, body: response_body) end it 'returns a Desired LRP Lifecycle Response' do @@ -378,7 +391,7 @@ module Diego response = client.desire_lrp(lrp) expect(response).to be_a(Bbs::Models::DesiredLRPLifecycleResponse) expect(response.error).to be_nil - expect(a_request(:post, 'https://bbs.example.com:4443/v1/desired_lrp/desire.r2').with( + expect(a_request(:post, "https://#{bbs_ip_1}:4443/v1/desired_lrp/desire.r2").with( body: Bbs::Models::DesireLRPRequest.encode(expected_desire_lrp_request).to_s, headers: { 'Content-Type' => 'application/x-protobuf' } )).to have_been_made @@ -395,7 +408,8 @@ module Diego context 'when it fails to make the request' do before do - stub_request(:post, 'https://bbs.example.com:4443/v1/desired_lrp/desire.r2').to_raise(StandardError.new('error message')) + stub_request(:post, "https://#{bbs_ip_1}:4443/v1/desired_lrp/desire.r2").to_raise(StandardError.new('error message')) + stub_request(:post, "https://#{bbs_ip_2}:4443/v1/desired_lrp/desire.r2").to_raise(StandardError.new('error message')) end it 'raises' do @@ -426,7 +440,7 @@ module Diego let(:process_guid) { 'process-guid' } before do - stub_request(:post, 'https://bbs.example.com:4443/v1/desired_lrps/get_by_process_guid.r2').to_return(status: response_status, body: response_body) + stub_request(:post, "https://#{bbs_ip_1}:4443/v1/desired_lrps/get_by_process_guid.r2").to_return(status: response_status, body: response_body) end it 'returns a Desired LRP Response' do @@ -436,7 +450,7 @@ module Diego expect(response).to be_a(Bbs::Models::DesiredLRPResponse) expect(response.error).to be_nil expect(response.desired_lrp).to eq(lrp) - expect(a_request(:post, 'https://bbs.example.com:4443/v1/desired_lrps/get_by_process_guid.r2').with( + expect(a_request(:post, "https://#{bbs_ip_1}:4443/v1/desired_lrps/get_by_process_guid.r2").with( body: Bbs::Models::DesiredLRPByProcessGuidRequest.encode(expected_request).to_s, headers: { 'Content-Type' => 'application/x-protobuf' } )).to have_been_made @@ -455,7 +469,8 @@ module Diego context 'when it fails to make the request' do before do - stub_request(:post, 'https://bbs.example.com:4443/v1/desired_lrps/get_by_process_guid.r2').to_raise(StandardError.new('error message')) + stub_request(:post, "https://#{bbs_ip_1}:4443/v1/desired_lrps/get_by_process_guid.r2").to_raise(StandardError.new('error message')) + stub_request(:post, "https://#{bbs_ip_2}:4443/v1/desired_lrps/get_by_process_guid.r2").to_raise(StandardError.new('error message')) end it 'raises' do @@ -484,7 +499,7 @@ module Diego let(:response_status) { 200 } before do - stub_request(:post, 'https://bbs.example.com:4443/v1/desired_lrp/remove').to_return(status: response_status, body: response_body) + stub_request(:post, "https://#{bbs_ip_1}:4443/v1/desired_lrp/remove").to_return(status: response_status, body: response_body) end it 'returns a Desired LRP Lifecycle Response' do @@ -493,7 +508,7 @@ module Diego response = client.remove_desired_lrp(process_guid) expect(response).to be_a(Bbs::Models::DesiredLRPLifecycleResponse) expect(response.error).to be_nil - expect(a_request(:post, 'https://bbs.example.com:4443/v1/desired_lrp/remove').with( + expect(a_request(:post, "https://#{bbs_ip_1}:4443/v1/desired_lrp/remove").with( body: Bbs::Models::RemoveDesiredLRPRequest.encode(expected_request).to_s, headers: { 'Content-Type' => 'application/x-protobuf' } )).to have_been_made @@ -510,7 +525,8 @@ module Diego context 'when it fails to make the request' do before do - stub_request(:post, 'https://bbs.example.com:4443/v1/desired_lrp/remove').to_raise(StandardError.new('error message')) + stub_request(:post, "https://#{bbs_ip_1}:4443/v1/desired_lrp/remove").to_raise(StandardError.new('error message')) + stub_request(:post, "https://#{bbs_ip_2}:4443/v1/desired_lrp/remove").to_raise(StandardError.new('error message')) end it 'raises' do @@ -539,7 +555,7 @@ module Diego let(:response_status) { 200 } before do - stub_request(:post, 'https://bbs.example.com:4443/v1/actual_lrps/retire').to_return(status: response_status, body: response_body) + stub_request(:post, "https://#{bbs_ip_1}:4443/v1/actual_lrps/retire").to_return(status: response_status, body: response_body) end it 'returns an Actual LRP Lifecycle Response' do @@ -548,7 +564,7 @@ module Diego response = client.retire_actual_lrp(actual_lrp_key) expect(response).to be_a(Bbs::Models::ActualLRPLifecycleResponse) expect(response.error).to be_nil - expect(a_request(:post, 'https://bbs.example.com:4443/v1/actual_lrps/retire').with( + expect(a_request(:post, "https://#{bbs_ip_1}:4443/v1/actual_lrps/retire").with( body: Bbs::Models::RetireActualLRPRequest.encode(expected_request).to_s, headers: { 'Content-Type' => 'application/x-protobuf' } )).to have_been_made @@ -567,7 +583,8 @@ module Diego context 'when it fails to make the request' do before do - stub_request(:post, 'https://bbs.example.com:4443/v1/actual_lrps/retire').to_raise(StandardError.new('error message')) + stub_request(:post, "https://#{bbs_ip_1}:4443/v1/actual_lrps/retire").to_raise(StandardError.new('error message')) + stub_request(:post, "https://#{bbs_ip_2}:4443/v1/actual_lrps/retire").to_raise(StandardError.new('error message')) end it 'raises' do @@ -590,25 +607,6 @@ module Diego end end - describe '#with_request_error_handling' do - it 'retries' do - tries = 0 - - client.with_request_error_handling do - tries += 1 - raise 'error' if tries < 2 - end - - expect(tries).to be > 1 - end - - it 'raises an error after all retries fail' do - expect { - client.with_request_error_handling { raise 'error' } - }.to raise_error(RequestError) - end - end - describe '#update_desired_lrp' do let(:process_guid) { 'process-guid' } let(:lrp_update) { ::Diego::Bbs::Models::DesiredLRPUpdate.new(instances: 3) } @@ -617,7 +615,7 @@ module Diego let(:response_status) { 200 } before do - stub_request(:post, 'https://bbs.example.com:4443/v1/desired_lrp/update').to_return(status: response_status, body: response_body) + stub_request(:post, "https://#{bbs_ip_1}:4443/v1/desired_lrp/update").to_return(status: response_status, body: response_body) end it 'returns a Desired LRP Lifecycle Response' do @@ -626,7 +624,7 @@ module Diego response = client.update_desired_lrp(process_guid, lrp_update) expect(response).to be_a(Bbs::Models::DesiredLRPLifecycleResponse) expect(response.error).to be_nil - expect(a_request(:post, 'https://bbs.example.com:4443/v1/desired_lrp/update').with( + expect(a_request(:post, "https://#{bbs_ip_1}:4443/v1/desired_lrp/update").with( body: Bbs::Models::UpdateDesiredLRPRequest.encode(expected_request).to_s, headers: { 'Content-Type' => 'application/x-protobuf' } )).to have_been_made @@ -643,7 +641,8 @@ module Diego context 'when it fails to make the request' do before do - stub_request(:post, 'https://bbs.example.com:4443/v1/desired_lrp/update').to_raise(StandardError.new('error message')) + stub_request(:post, "https://#{bbs_ip_1}:4443/v1/desired_lrp/update").to_raise(StandardError.new('error message')) + stub_request(:post, "https://#{bbs_ip_2}:4443/v1/desired_lrp/update").to_raise(StandardError.new('error message')) end it 'raises' do @@ -677,7 +676,7 @@ module Diego let(:actual_lrps) { [::Diego::Bbs::Models::ActualLRP.new] } let(:response_status) { 200 } before do - stub_request(:post, 'https://bbs.example.com:4443/v1/actual_lrps/list').to_return(status: response_status, body: response_body) + stub_request(:post, "https://#{bbs_ip_1}:4443/v1/actual_lrps/list").to_return(status: response_status, body: response_body) end it 'returns a LRP instances response' do @@ -687,7 +686,7 @@ module Diego expect(response).to be_a(Bbs::Models::ActualLRPsResponse) expect(response.error).to be_nil expect(response.actual_lrps).to eq(actual_lrps) - expect(a_request(:post, 'https://bbs.example.com:4443/v1/actual_lrps/list').with( + expect(a_request(:post, "https://#{bbs_ip_1}:4443/v1/actual_lrps/list").with( body: Bbs::Models::ActualLRPsRequest.encode(expected_request).to_s, headers: { 'Content-Type' => 'application/x-protobuf' } )).to have_been_made @@ -704,7 +703,7 @@ module Diego let(:domain) { 'domain' } before do - stub_request(:post, 'https://bbs.example.com:4443/v1/desired_lrp_scheduling_infos/list').to_return(status: response_status, body: response_body) + stub_request(:post, "https://#{bbs_ip_1}:4443/v1/desired_lrp_scheduling_infos/list").to_return(status: response_status, body: response_body) end it 'returns a Desired LRP Scheduling Infos Response' do @@ -714,7 +713,7 @@ module Diego expect(response).to be_a(Bbs::Models::DesiredLRPSchedulingInfosResponse) expect(response.error).to be_nil expect(response.desired_lrp_scheduling_infos).to eq(scheduling_infos) - expect(a_request(:post, 'https://bbs.example.com:4443/v1/desired_lrp_scheduling_infos/list').with( + expect(a_request(:post, "https://#{bbs_ip_1}:4443/v1/desired_lrp_scheduling_infos/list").with( body: Bbs::Models::DesiredLRPsRequest.encode(expected_request).to_s, headers: { 'Content-Type' => 'application/x-protobuf' } )).to have_been_made @@ -733,7 +732,8 @@ module Diego context 'when it fails to make the request' do before do - stub_request(:post, 'https://bbs.example.com:4443/v1/desired_lrp_scheduling_infos/list').to_raise(StandardError.new('error message')) + stub_request(:post, "https://#{bbs_ip_1}:4443/v1/desired_lrp_scheduling_infos/list").to_raise(StandardError.new('error message')) + stub_request(:post, "https://#{bbs_ip_2}:4443/v1/desired_lrp_scheduling_infos/list").to_raise(StandardError.new('error message')) end it 'raises' do @@ -759,22 +759,24 @@ module Diego end describe '#with_request_error_handling' do - it 'retries' do - tries = 0 - - client.with_request_error_handling do - tries += 1 - raise 'error' if tries < 2 + it 'retries three times for each IP that the bbs domain resolves to and then errors' do + (1..3).each do |num_ips| + allow(::Resolv).to receive(:getaddresses).with(bbs_domain).and_return(Array.new(num_ips) { random_ip }) + call_count = 0 + expect { + client.with_request_error_handling do + call_count += 1 + _ = client.bbs_ip # stores IPs an instance variable and re-resolves when its empty + raise 'error' + end + }.to raise_error(RequestError) + expect(call_count).to eq(num_ips * 3) end - - expect(tries).to be > 1 end + end - it 'raises an error after all retries fail' do - expect { - client.with_request_error_handling { raise 'error' } - }.to raise_error(RequestError) - end + def random_ip + Array.new(4) { rand(256) }.join('.') end end end From 1af360419e2040b93fd042ee392715f8ba05ef8b Mon Sep 17 00:00:00 2001 From: Will Gant Date: Mon, 7 Nov 2022 10:04:38 +0000 Subject: [PATCH 2/4] Configure http client to target specific bbs IP --- lib/diego/client.rb | 165 ++++++++++++------------------ spec/diego/client_spec.rb | 180 ++++++++++++++++++--------------- spec/request/processes_spec.rb | 6 +- spec/request/v2/apps_spec.rb | 6 +- spec/request/v2/spaces_spec.rb | 6 +- 5 files changed, 178 insertions(+), 185 deletions(-) diff --git a/lib/diego/client.rb b/lib/diego/client.rb index 39338af2538..c0348e5585a 100644 --- a/lib/diego/client.rb +++ b/lib/diego/client.rb @@ -3,16 +3,15 @@ require 'diego/routes' require 'uri' require 'resolv' +require 'net/http' module Diego class Client - PROTOBUF_HEADER = { 'Content-Type'.freeze => 'application/x-protobuf'.freeze }.freeze - def initialize(url:, ca_cert_file:, client_cert_file:, client_key_file:, connect_timeout:, send_timeout:, receive_timeout:) ENV['PB_IGNORE_DEPRECATIONS'] ||= 'true' - @bbs_url = url - @client = build_client( + @bbs_url = URI(url) + @http_client = new_http_client( ca_cert_file, client_cert_file, client_key_file, @@ -22,149 +21,113 @@ def initialize(url:, ca_cert_file:, client_cert_file:, client_key_file:, end def ping - response = with_request_error_handling do - client.post(URI.join(bbs_url_from_ip, Routes::PING)) - end + req = request(path: Routes::PING) + response = request_with_error_handling(req) - validate_status!(response: response, statuses: [200]) + validate_status!(response) protobuf_decode!(response.body, Bbs::Models::PingResponse) end def upsert_domain(domain:, ttl:) - request = protobuf_encode!({ domain: domain, ttl: ttl.to_i }, Bbs::Models::UpsertDomainRequest) - - response = with_request_error_handling do - client.post(URI.join(bbs_url_from_ip, Routes::UPSERT_DOMAIN), request, PROTOBUF_HEADER) - end + req = request(body: protobuf_encode!({ domain: domain, ttl: ttl.to_i }, Bbs::Models::UpsertDomainRequest), path: Routes::UPSERT_DOMAIN) + response = request_with_error_handling(req) - validate_status!(response: response, statuses: [200]) + validate_status!(response) protobuf_decode!(response.body, Bbs::Models::UpsertDomainResponse) end def desire_task(task_definition:, domain:, task_guid:) - request = protobuf_encode!({ task_definition: task_definition, domain: domain, task_guid: task_guid }, Bbs::Models::DesireTaskRequest) - - response = with_request_error_handling do - client.post(URI.join(bbs_url_from_ip, Routes::DESIRE_TASK), request, PROTOBUF_HEADER) - end + req = request(body: protobuf_encode!({ task_definition: task_definition, domain: domain, task_guid: task_guid }, Bbs::Models::DesireTaskRequest), path: Routes::DESIRE_TASK) + response = request_with_error_handling(req) - validate_status!(response: response, statuses: [200]) + validate_status!(response) protobuf_decode!(response.body, Bbs::Models::TaskLifecycleResponse) end def task_by_guid(task_guid) - request = protobuf_encode!({ task_guid: task_guid }, Bbs::Models::TaskByGuidRequest) + req = request(body: protobuf_encode!({ task_guid: task_guid }, Bbs::Models::TaskByGuidRequest), path: Routes::TASK_BY_GUID) + response = request_with_error_handling(req) - response = with_request_error_handling do - client.post(URI.join(bbs_url_from_ip, Routes::TASK_BY_GUID), request, PROTOBUF_HEADER) - end - - validate_status!(response: response, statuses: [200]) + validate_status!(response) protobuf_decode!(response.body, Bbs::Models::TaskResponse) end def tasks(domain: '', cell_id: '') - request = protobuf_encode!({ domain: domain, cell_id: cell_id }, Bbs::Models::TasksRequest) - - response = with_request_error_handling do - client.post(URI.join(bbs_url_from_ip, Routes::LIST_TASKS), request, PROTOBUF_HEADER) - end + req = request(body: protobuf_encode!({ domain: domain, cell_id: cell_id }, Bbs::Models::TasksRequest), path: Routes::LIST_TASKS) + response = request_with_error_handling(req) - validate_status!(response: response, statuses: [200]) + validate_status!(response) protobuf_decode!(response.body, Bbs::Models::TasksResponse) end def cancel_task(task_guid) - request = protobuf_encode!({ task_guid: task_guid }, Bbs::Models::TaskGuidRequest) + req = request(body: protobuf_encode!({ task_guid: task_guid }, Bbs::Models::TaskGuidRequest), path: Routes::CANCEL_TASK) + response = request_with_error_handling(req) - response = with_request_error_handling do - client.post(URI.join(bbs_url_from_ip, Routes::CANCEL_TASK), request, PROTOBUF_HEADER) - end - - validate_status!(response: response, statuses: [200]) + validate_status!(response) protobuf_decode!(response.body, Bbs::Models::TaskLifecycleResponse) end def desire_lrp(lrp) - request = protobuf_encode!({ desired_lrp: lrp }, Bbs::Models::DesireLRPRequest) - - response = with_request_error_handling do - client.post(URI.join(bbs_url_from_ip, Routes::DESIRE_LRP), request, PROTOBUF_HEADER) - end + req = request(body: protobuf_encode!({ desired_lrp: lrp }, Bbs::Models::DesireLRPRequest), path: Routes::DESIRE_LRP) + response = request_with_error_handling(req) - validate_status!(response: response, statuses: [200]) + validate_status!(response) protobuf_decode!(response.body, Bbs::Models::DesiredLRPLifecycleResponse) end def desired_lrp_by_process_guid(process_guid) - request = protobuf_encode!({ process_guid: process_guid }, Bbs::Models::DesiredLRPByProcessGuidRequest) + req = request(body: protobuf_encode!({ process_guid: process_guid }, Bbs::Models::DesiredLRPByProcessGuidRequest), path: Routes::DESIRED_LRP_BY_PROCESS_GUID) + response = request_with_error_handling(req) - response = with_request_error_handling do - client.post(URI.join(bbs_url_from_ip, Routes::DESIRED_LRP_BY_PROCESS_GUID), request, PROTOBUF_HEADER) - end - - validate_status!(response: response, statuses: [200]) + validate_status!(response) protobuf_decode!(response.body, Bbs::Models::DesiredLRPResponse) end def update_desired_lrp(process_guid, lrp_update) - request = protobuf_encode!({ process_guid: process_guid, update: lrp_update }, Bbs::Models::UpdateDesiredLRPRequest) - - response = with_request_error_handling do - client.post(URI.join(bbs_url_from_ip, Routes::UPDATE_DESIRED_LRP), request, PROTOBUF_HEADER) - end + req = request(body: protobuf_encode!({ process_guid: process_guid, update: lrp_update }, Bbs::Models::UpdateDesiredLRPRequest), path: Routes::UPDATE_DESIRED_LRP) + response = request_with_error_handling(req) - validate_status!(response: response, statuses: [200]) + validate_status!(response) protobuf_decode!(response.body, Bbs::Models::DesiredLRPLifecycleResponse) end def remove_desired_lrp(process_guid) - request = protobuf_encode!({ process_guid: process_guid }, Bbs::Models::RemoveDesiredLRPRequest) + req = request(body: protobuf_encode!({ process_guid: process_guid }, Bbs::Models::RemoveDesiredLRPRequest), path: Routes::REMOVE_DESIRED_LRP) + response = request_with_error_handling(req) - response = with_request_error_handling do - client.post(URI.join(bbs_url_from_ip, Routes::REMOVE_DESIRED_LRP), request, PROTOBUF_HEADER) - end - - validate_status!(response: response, statuses: [200]) + validate_status!(response) protobuf_decode!(response.body, Bbs::Models::DesiredLRPLifecycleResponse) end def retire_actual_lrp(actual_lrp_key) - request = protobuf_encode!({ actual_lrp_key: actual_lrp_key }, Bbs::Models::RetireActualLRPRequest) - - response = with_request_error_handling do - client.post(URI.join(bbs_url_from_ip, Routes::RETIRE_ACTUAL_LRP), request, PROTOBUF_HEADER) - end + req = request(body: protobuf_encode!({ actual_lrp_key: actual_lrp_key }, Bbs::Models::RetireActualLRPRequest), path: Routes::RETIRE_ACTUAL_LRP) + response = request_with_error_handling(req) - validate_status!(response: response, statuses: [200]) + validate_status!(response) protobuf_decode!(response.body, Bbs::Models::ActualLRPLifecycleResponse) end def desired_lrp_scheduling_infos(domain) - request = protobuf_encode!({ domain: domain }, Bbs::Models::DesiredLRPsRequest) + req = request(body: protobuf_encode!({ domain: domain }, Bbs::Models::DesiredLRPsRequest), path: Routes::DESIRED_LRP_SCHEDULING_INFOS) + response = request_with_error_handling(req) - response = with_request_error_handling do - client.post(URI.join(bbs_url_from_ip, Routes::DESIRED_LRP_SCHEDULING_INFOS), request, PROTOBUF_HEADER) - end - - validate_status!(response: response, statuses: [200]) + validate_status!(response) protobuf_decode!(response.body, Bbs::Models::DesiredLRPSchedulingInfosResponse) end def actual_lrps_by_process_guid(process_guid) - request = protobuf_encode!({ process_guid: process_guid }, Bbs::Models::ActualLRPsRequest) - - response = with_request_error_handling do - client.post(URI.join(bbs_url_from_ip, Routes::ACTUAL_LRPS), request, PROTOBUF_HEADER) - end + req = request(body: protobuf_encode!({ process_guid: process_guid }, Bbs::Models::ActualLRPsRequest), path: Routes::ACTUAL_LRPS) + response = request_with_error_handling(req) - validate_status!(response: response, statuses: [200]) + validate_status!(response) protobuf_decode!(response.body, Bbs::Models::ActualLRPsResponse) end - def with_request_error_handling(&blk) + def request_with_error_handling(req) tries ||= 3 - yield + http_client.ipaddr = bbs_ip # tell the HTTP client which exact IP to target + http_client.request(req) rescue => e ips_remaining.shift retry unless ips_remaining.empty? && (tries -= 1).zero? @@ -178,7 +141,7 @@ def bbs_ip private - attr_reader :client, :bbs_url + attr_reader :http_client, :bbs_url attr_accessor :ips_remaining def protobuf_encode!(hash, protobuf_message_class) @@ -189,8 +152,15 @@ def protobuf_encode!(hash, protobuf_message_class) raise EncodeError.new(e.message) end - def validate_status!(response:, statuses:) - raise ResponseError.new("failed with status: #{response.status}, body: #{response.body}") unless statuses.include?(response.status) + def request(body: nil, path:) + req = Net::HTTP::Post.new(path) + req.body = body if body + req['Content-Type'.freeze] = 'application/x-protobuf'.freeze + req + end + + def validate_status!(response) + raise ResponseError.new("failed with status: #{response.code}, body: #{response.body}") unless response.code == '200' end def protobuf_decode!(message, protobuf_decoder) @@ -200,23 +170,20 @@ def protobuf_decode!(message, protobuf_decoder) end def bbs_ip_addresses - Resolv.getaddresses(URI(bbs_url).host).dup - end - - def bbs_url_from_ip - uri = URI(bbs_url) - uri.host = bbs_ip - uri.to_s + Resolv.getaddresses(bbs_url.host).dup end - def build_client(ca_cert_file, client_cert_file, client_key_file, + def new_http_client(ca_cert_file, client_cert_file, client_key_file, connect_timeout, send_timeout, receive_timeout) - client = HTTPClient.new - client.connect_timeout = connect_timeout - client.send_timeout = send_timeout - client.receive_timeout = receive_timeout - client.ssl_config.set_client_cert_file(client_cert_file, client_key_file) - client.ssl_config.set_trust_ca(ca_cert_file) + client = Net::HTTP.new(bbs_url.host, bbs_url.port) + client.use_ssl = true + client.verify_mode = OpenSSL::SSL::VERIFY_PEER + client.key = OpenSSL::PKey::RSA.new(File.read(client_key_file)) + client.cert = OpenSSL::X509::Certificate.new(File.read(client_cert_file)) + client.ca_file = ca_cert_file + client.open_timeout = connect_timeout + client.read_timeout = receive_timeout + client.write_timeout = send_timeout client end end diff --git a/spec/diego/client_spec.rb b/spec/diego/client_spec.rb index 5b1909a7f81..76e4e921dd6 100644 --- a/spec/diego/client_spec.rb +++ b/spec/diego/client_spec.rb @@ -33,12 +33,12 @@ module Diego let(:response_status) { 200 } before do - stub_request(:post, "https://#{bbs_ip_1}:4443/v1/ping").to_return(status: response_status, body: response_body) + stub_request(:post, "https://#{bbs_domain}:#{bbs_port}/v1/ping").to_return(status: response_status, body: response_body) end it 'returns a ping response' do expect(client.ping.available).to be_truthy - expect(a_request(:post, "https://#{bbs_ip_1}:4443/v1/ping")).to have_been_made + expect(a_request(:post, "https://#{bbs_domain}:#{bbs_port}/v1/ping")).to have_been_made.once end context 'when it does not return successfully' do @@ -52,8 +52,7 @@ module Diego context 'when it fails to make the request' do before do - stub_request(:post, "https://#{bbs_ip_1}:4443/v1/ping").to_raise(StandardError.new('error message')) - stub_request(:post, "https://#{bbs_ip_2}:4443/v1/ping").to_raise(StandardError.new('error message')) + stub_request(:post, "https://#{bbs_domain}:#{bbs_port}/v1/ping").to_raise(StandardError.new('error message')) end it 'raises' do @@ -77,7 +76,7 @@ module Diego let(:ttl) { VCAP::CloudController::Diego::APP_LRP_DOMAIN_TTL } before do - stub_request(:post, "https://#{bbs_ip_1}:4443/v1/domains/upsert").to_return(status: response_status, body: response_body) + stub_request(:post, "https://#{bbs_domain}:#{bbs_port}/v1/domains/upsert").to_return(status: response_status, body: response_body) end it 'returns a domain lifecycle response' do @@ -86,10 +85,10 @@ module Diego response = client.upsert_domain(domain: domain, ttl: ttl) expect(response.error).to be_nil - expect(a_request(:post, "https://#{bbs_ip_1}:4443/v1/domains/upsert").with( + expect(a_request(:post, "https://#{bbs_domain}:#{bbs_port}/v1/domains/upsert").with( body: Bbs::Models::UpsertDomainRequest.encode(expected_domain_request).to_s, headers: { 'Content-Type' => 'application/x-protobuf' } - )).to have_been_made + )).to have_been_made.once end context 'when it does not return successfully' do @@ -103,8 +102,7 @@ module Diego context 'when it fails to make the request' do before do - stub_request(:post, "https://#{bbs_ip_1}:4443/v1/domains/upsert").to_raise(StandardError.new('error message')) - stub_request(:post, "https://#{bbs_ip_2}:4443/v1/domains/upsert").to_raise(StandardError.new('error message')) + stub_request(:post, "https://#{bbs_domain}:#{bbs_port}/v1/domains/upsert").to_raise(StandardError.new('error message')) end it 'raises' do @@ -133,7 +131,7 @@ module Diego let(:task_definition) { Bbs::Models::TaskDefinition.new } before do - stub_request(:post, "https://#{bbs_ip_1}:4443/v1/tasks/desire.r2").to_return(status: response_status, body: response_body) + stub_request(:post, "https://#{bbs_domain}:#{bbs_port}/v1/tasks/desire.r2").to_return(status: response_status, body: response_body) end it 'returns a task lifecycle response' do @@ -142,10 +140,10 @@ module Diego response = client.desire_task(task_definition: task_definition, task_guid: 'task_guid', domain: 'domain') expect(response.error).to be_nil - expect(a_request(:post, "https://#{bbs_ip_1}:4443/v1/tasks/desire.r2").with( + expect(a_request(:post, "https://#{bbs_domain}:#{bbs_port}/v1/tasks/desire.r2").with( body: Bbs::Models::DesireTaskRequest.encode(expected_task_request).to_s, headers: { 'Content-Type' => 'application/x-protobuf' } - )).to have_been_made + )).to have_been_made.once end context 'when it does not return successfully' do @@ -161,8 +159,7 @@ module Diego context 'when it fails to make the request' do before do - stub_request(:post, "https://#{bbs_ip_1}:4443/v1/tasks/desire.r2").to_raise(StandardError.new('error message')) - stub_request(:post, "https://#{bbs_ip_2}:4443/v1/tasks/desire.r2").to_raise(StandardError.new('error message')) + stub_request(:post, "https://#{bbs_domain}:#{bbs_port}/v1/tasks/desire.r2").to_raise(StandardError.new('error message')) end it 'raises' do @@ -190,7 +187,7 @@ module Diego let(:response_status) { 200 } before do - stub_request(:post, "https://#{bbs_ip_1}:4443/v1/tasks/list.r2").to_return(status: response_status, body: response_body) + stub_request(:post, "https://#{bbs_domain}:#{bbs_port}/v1/tasks/list.r2").to_return(status: response_status, body: response_body) end it 'returns a tasks response' do @@ -199,10 +196,10 @@ module Diego expected_request = Bbs::Models::TasksRequest.new expect(response.error).to be_nil - expect(a_request(:post, "https://#{bbs_ip_1}:4443/v1/tasks/list.r2").with( + expect(a_request(:post, "https://#{bbs_domain}:#{bbs_port}/v1/tasks/list.r2").with( body: Bbs::Models::TasksRequest.encode(expected_request).to_s, headers: { 'Content-Type' => 'application/x-protobuf' } - )).to have_been_made + )).to have_been_made.once end describe 'filtering' do @@ -212,10 +209,10 @@ module Diego expected_request = Bbs::Models::TasksRequest.new(domain: 'some-domain') expect(response.error).to be_nil - expect(a_request(:post, "https://#{bbs_ip_1}:4443/v1/tasks/list.r2").with( + expect(a_request(:post, "https://#{bbs_domain}:#{bbs_port}/v1/tasks/list.r2").with( body: Bbs::Models::TasksRequest.encode(expected_request).to_s, headers: { 'Content-Type' => 'application/x-protobuf' } - )).to have_been_made + )).to have_been_made.once end it 'filters by cell_id' do @@ -224,10 +221,10 @@ module Diego expected_request = Bbs::Models::TasksRequest.new(cell_id: 'cell-id-thing') expect(response.error).to be_nil - expect(a_request(:post, "https://#{bbs_ip_1}:4443/v1/tasks/list.r2").with( + expect(a_request(:post, "https://#{bbs_domain}:#{bbs_port}/v1/tasks/list.r2").with( body: Bbs::Models::TasksRequest.encode(expected_request).to_s, headers: { 'Content-Type' => 'application/x-protobuf' } - )).to have_been_made + )).to have_been_made.once end end @@ -242,8 +239,7 @@ module Diego context 'when it fails to make the request' do before do - stub_request(:post, "https://#{bbs_ip_1}:4443/v1/tasks/list.r2").to_raise(StandardError.new('error message')) - stub_request(:post, "https://#{bbs_ip_2}:4443/v1/tasks/list.r2").to_raise(StandardError.new('error message')) + stub_request(:post, "https://#{bbs_domain}:#{bbs_port}/v1/tasks/list.r2").to_raise(StandardError.new('error message')) end it 'raises' do @@ -271,7 +267,7 @@ module Diego let(:response_status) { 200 } before do - stub_request(:post, "https://#{bbs_ip_1}:4443/v1/tasks/get_by_task_guid.r2").to_return(status: response_status, body: response_body) + stub_request(:post, "https://#{bbs_domain}:#{bbs_port}/v1/tasks/get_by_task_guid.r2").to_return(status: response_status, body: response_body) end it 'returns a task response' do @@ -280,10 +276,10 @@ module Diego expected_request = Bbs::Models::TaskByGuidRequest.new(task_guid: 'some-guid') expect(response.error).to be_nil - expect(a_request(:post, "https://#{bbs_ip_1}:4443/v1/tasks/get_by_task_guid.r2").with( + expect(a_request(:post, "https://#{bbs_domain}:#{bbs_port}/v1/tasks/get_by_task_guid.r2").with( body: Bbs::Models::TaskByGuidRequest.encode(expected_request).to_s, headers: { 'Content-Type' => 'application/x-protobuf' } - )).to have_been_made + )).to have_been_made.once end context 'when it does not return successfully' do @@ -297,8 +293,7 @@ module Diego context 'when it fails to make the request' do before do - stub_request(:post, "https://#{bbs_ip_1}:4443/v1/tasks/get_by_task_guid.r2").to_raise(StandardError.new('error message')) - stub_request(:post, "https://#{bbs_ip_2}:4443/v1/tasks/get_by_task_guid.r2").to_raise(StandardError.new('error message')) + stub_request(:post, "https://#{bbs_domain}:#{bbs_port}/v1/tasks/get_by_task_guid.r2").to_raise(StandardError.new('error message')) end it 'raises' do @@ -326,7 +321,7 @@ module Diego let(:response_status) { 200 } before do - stub_request(:post, "https://#{bbs_ip_1}:4443/v1/tasks/cancel").to_return(status: response_status, body: response_body) + stub_request(:post, "https://#{bbs_domain}:#{bbs_port}/v1/tasks/cancel").to_return(status: response_status, body: response_body) end it 'returns a task lifecycle response' do @@ -335,10 +330,10 @@ module Diego response = client.cancel_task('some-guid') expect(response.error).to be_nil - expect(a_request(:post, "https://#{bbs_ip_1}:4443/v1/tasks/cancel").with( + expect(a_request(:post, "https://#{bbs_domain}:#{bbs_port}/v1/tasks/cancel").with( body: Bbs::Models::TaskGuidRequest.encode(expected_cancel_request).to_s, headers: { 'Content-Type' => 'application/x-protobuf' } - )).to have_been_made + )).to have_been_made.once end context 'when it does not return successfully' do @@ -352,8 +347,7 @@ module Diego context 'when it fails to make the request' do before do - stub_request(:post, "https://#{bbs_ip_1}:4443/v1/tasks/cancel").to_raise(StandardError.new('error message')) - stub_request(:post, "https://#{bbs_ip_2}:4443/v1/tasks/cancel").to_raise(StandardError.new('error message')) + stub_request(:post, "https://#{bbs_domain}:#{bbs_port}/v1/tasks/cancel").to_raise(StandardError.new('error message')) end it 'raises' do @@ -382,7 +376,7 @@ module Diego let(:lrp) { ::Diego::Bbs::Models::DesiredLRP.new } before do - stub_request(:post, "https://#{bbs_ip_1}:4443/v1/desired_lrp/desire.r2").to_return(status: response_status, body: response_body) + stub_request(:post, "https://#{bbs_domain}:#{bbs_port}/v1/desired_lrp/desire.r2").to_return(status: response_status, body: response_body) end it 'returns a Desired LRP Lifecycle Response' do @@ -391,10 +385,10 @@ module Diego response = client.desire_lrp(lrp) expect(response).to be_a(Bbs::Models::DesiredLRPLifecycleResponse) expect(response.error).to be_nil - expect(a_request(:post, "https://#{bbs_ip_1}:4443/v1/desired_lrp/desire.r2").with( + expect(a_request(:post, "https://#{bbs_domain}:#{bbs_port}/v1/desired_lrp/desire.r2").with( body: Bbs::Models::DesireLRPRequest.encode(expected_desire_lrp_request).to_s, headers: { 'Content-Type' => 'application/x-protobuf' } - )).to have_been_made + )).to have_been_made.once end context 'when it does not return successfully' do @@ -408,8 +402,7 @@ module Diego context 'when it fails to make the request' do before do - stub_request(:post, "https://#{bbs_ip_1}:4443/v1/desired_lrp/desire.r2").to_raise(StandardError.new('error message')) - stub_request(:post, "https://#{bbs_ip_2}:4443/v1/desired_lrp/desire.r2").to_raise(StandardError.new('error message')) + stub_request(:post, "https://#{bbs_domain}:#{bbs_port}/v1/desired_lrp/desire.r2").to_raise(StandardError.new('error message')) end it 'raises' do @@ -440,7 +433,7 @@ module Diego let(:process_guid) { 'process-guid' } before do - stub_request(:post, "https://#{bbs_ip_1}:4443/v1/desired_lrps/get_by_process_guid.r2").to_return(status: response_status, body: response_body) + stub_request(:post, "https://#{bbs_domain}:#{bbs_port}/v1/desired_lrps/get_by_process_guid.r2").to_return(status: response_status, body: response_body) end it 'returns a Desired LRP Response' do @@ -450,10 +443,10 @@ module Diego expect(response).to be_a(Bbs::Models::DesiredLRPResponse) expect(response.error).to be_nil expect(response.desired_lrp).to eq(lrp) - expect(a_request(:post, "https://#{bbs_ip_1}:4443/v1/desired_lrps/get_by_process_guid.r2").with( + expect(a_request(:post, "https://#{bbs_domain}:#{bbs_port}/v1/desired_lrps/get_by_process_guid.r2").with( body: Bbs::Models::DesiredLRPByProcessGuidRequest.encode(expected_request).to_s, headers: { 'Content-Type' => 'application/x-protobuf' } - )).to have_been_made + )).to have_been_made.once end context 'when it does not return successfully' do @@ -469,8 +462,7 @@ module Diego context 'when it fails to make the request' do before do - stub_request(:post, "https://#{bbs_ip_1}:4443/v1/desired_lrps/get_by_process_guid.r2").to_raise(StandardError.new('error message')) - stub_request(:post, "https://#{bbs_ip_2}:4443/v1/desired_lrps/get_by_process_guid.r2").to_raise(StandardError.new('error message')) + stub_request(:post, "https://#{bbs_domain}:#{bbs_port}/v1/desired_lrps/get_by_process_guid.r2").to_raise(StandardError.new('error message')) end it 'raises' do @@ -499,7 +491,7 @@ module Diego let(:response_status) { 200 } before do - stub_request(:post, "https://#{bbs_ip_1}:4443/v1/desired_lrp/remove").to_return(status: response_status, body: response_body) + stub_request(:post, "https://#{bbs_domain}:#{bbs_port}/v1/desired_lrp/remove").to_return(status: response_status, body: response_body) end it 'returns a Desired LRP Lifecycle Response' do @@ -508,10 +500,10 @@ module Diego response = client.remove_desired_lrp(process_guid) expect(response).to be_a(Bbs::Models::DesiredLRPLifecycleResponse) expect(response.error).to be_nil - expect(a_request(:post, "https://#{bbs_ip_1}:4443/v1/desired_lrp/remove").with( + expect(a_request(:post, "https://#{bbs_domain}:#{bbs_port}/v1/desired_lrp/remove").with( body: Bbs::Models::RemoveDesiredLRPRequest.encode(expected_request).to_s, headers: { 'Content-Type' => 'application/x-protobuf' } - )).to have_been_made + )).to have_been_made.once end context 'when it does not return successfully' do @@ -525,8 +517,7 @@ module Diego context 'when it fails to make the request' do before do - stub_request(:post, "https://#{bbs_ip_1}:4443/v1/desired_lrp/remove").to_raise(StandardError.new('error message')) - stub_request(:post, "https://#{bbs_ip_2}:4443/v1/desired_lrp/remove").to_raise(StandardError.new('error message')) + stub_request(:post, "https://#{bbs_domain}:#{bbs_port}/v1/desired_lrp/remove").to_raise(StandardError.new('error message')) end it 'raises' do @@ -555,7 +546,7 @@ module Diego let(:response_status) { 200 } before do - stub_request(:post, "https://#{bbs_ip_1}:4443/v1/actual_lrps/retire").to_return(status: response_status, body: response_body) + stub_request(:post, "https://#{bbs_domain}:#{bbs_port}/v1/actual_lrps/retire").to_return(status: response_status, body: response_body) end it 'returns an Actual LRP Lifecycle Response' do @@ -564,10 +555,10 @@ module Diego response = client.retire_actual_lrp(actual_lrp_key) expect(response).to be_a(Bbs::Models::ActualLRPLifecycleResponse) expect(response.error).to be_nil - expect(a_request(:post, "https://#{bbs_ip_1}:4443/v1/actual_lrps/retire").with( + expect(a_request(:post, "https://#{bbs_domain}:#{bbs_port}/v1/actual_lrps/retire").with( body: Bbs::Models::RetireActualLRPRequest.encode(expected_request).to_s, headers: { 'Content-Type' => 'application/x-protobuf' } - )).to have_been_made + )).to have_been_made.once end context 'when it does not return successfully' do @@ -583,8 +574,7 @@ module Diego context 'when it fails to make the request' do before do - stub_request(:post, "https://#{bbs_ip_1}:4443/v1/actual_lrps/retire").to_raise(StandardError.new('error message')) - stub_request(:post, "https://#{bbs_ip_2}:4443/v1/actual_lrps/retire").to_raise(StandardError.new('error message')) + stub_request(:post, "https://#{bbs_domain}:#{bbs_port}/v1/actual_lrps/retire").to_raise(StandardError.new('error message')) end it 'raises' do @@ -615,7 +605,7 @@ module Diego let(:response_status) { 200 } before do - stub_request(:post, "https://#{bbs_ip_1}:4443/v1/desired_lrp/update").to_return(status: response_status, body: response_body) + stub_request(:post, "https://#{bbs_domain}:#{bbs_port}/v1/desired_lrp/update").to_return(status: response_status, body: response_body) end it 'returns a Desired LRP Lifecycle Response' do @@ -624,10 +614,10 @@ module Diego response = client.update_desired_lrp(process_guid, lrp_update) expect(response).to be_a(Bbs::Models::DesiredLRPLifecycleResponse) expect(response.error).to be_nil - expect(a_request(:post, "https://#{bbs_ip_1}:4443/v1/desired_lrp/update").with( + expect(a_request(:post, "https://#{bbs_domain}:#{bbs_port}/v1/desired_lrp/update").with( body: Bbs::Models::UpdateDesiredLRPRequest.encode(expected_request).to_s, headers: { 'Content-Type' => 'application/x-protobuf' } - )).to have_been_made + )).to have_been_made.once end context 'when it does not return successfully' do @@ -641,8 +631,7 @@ module Diego context 'when it fails to make the request' do before do - stub_request(:post, "https://#{bbs_ip_1}:4443/v1/desired_lrp/update").to_raise(StandardError.new('error message')) - stub_request(:post, "https://#{bbs_ip_2}:4443/v1/desired_lrp/update").to_raise(StandardError.new('error message')) + stub_request(:post, "https://#{bbs_domain}:#{bbs_port}/v1/desired_lrp/update").to_raise(StandardError.new('error message')) end it 'raises' do @@ -676,7 +665,7 @@ module Diego let(:actual_lrps) { [::Diego::Bbs::Models::ActualLRP.new] } let(:response_status) { 200 } before do - stub_request(:post, "https://#{bbs_ip_1}:4443/v1/actual_lrps/list").to_return(status: response_status, body: response_body) + stub_request(:post, "https://#{bbs_domain}:#{bbs_port}/v1/actual_lrps/list").to_return(status: response_status, body: response_body) end it 'returns a LRP instances response' do @@ -686,10 +675,10 @@ module Diego expect(response).to be_a(Bbs::Models::ActualLRPsResponse) expect(response.error).to be_nil expect(response.actual_lrps).to eq(actual_lrps) - expect(a_request(:post, "https://#{bbs_ip_1}:4443/v1/actual_lrps/list").with( + expect(a_request(:post, "https://#{bbs_domain}:#{bbs_port}/v1/actual_lrps/list").with( body: Bbs::Models::ActualLRPsRequest.encode(expected_request).to_s, headers: { 'Content-Type' => 'application/x-protobuf' } - )).to have_been_made + )).to have_been_made.once end end @@ -703,7 +692,7 @@ module Diego let(:domain) { 'domain' } before do - stub_request(:post, "https://#{bbs_ip_1}:4443/v1/desired_lrp_scheduling_infos/list").to_return(status: response_status, body: response_body) + stub_request(:post, "https://#{bbs_domain}:#{bbs_port}/v1/desired_lrp_scheduling_infos/list").to_return(status: response_status, body: response_body) end it 'returns a Desired LRP Scheduling Infos Response' do @@ -713,10 +702,10 @@ module Diego expect(response).to be_a(Bbs::Models::DesiredLRPSchedulingInfosResponse) expect(response.error).to be_nil expect(response.desired_lrp_scheduling_infos).to eq(scheduling_infos) - expect(a_request(:post, "https://#{bbs_ip_1}:4443/v1/desired_lrp_scheduling_infos/list").with( + expect(a_request(:post, "https://#{bbs_domain}:#{bbs_port}/v1/desired_lrp_scheduling_infos/list").with( body: Bbs::Models::DesiredLRPsRequest.encode(expected_request).to_s, headers: { 'Content-Type' => 'application/x-protobuf' } - )).to have_been_made + )).to have_been_made.once end context 'when it does not return successfully' do @@ -732,8 +721,7 @@ module Diego context 'when it fails to make the request' do before do - stub_request(:post, "https://#{bbs_ip_1}:4443/v1/desired_lrp_scheduling_infos/list").to_raise(StandardError.new('error message')) - stub_request(:post, "https://#{bbs_ip_2}:4443/v1/desired_lrp_scheduling_infos/list").to_raise(StandardError.new('error message')) + stub_request(:post, "https://#{bbs_domain}:#{bbs_port}/v1/desired_lrp_scheduling_infos/list").to_raise(StandardError.new('error message')) end it 'raises' do @@ -758,25 +746,57 @@ module Diego end end - describe '#with_request_error_handling' do - it 'retries three times for each IP that the bbs domain resolves to and then errors' do - (1..3).each do |num_ips| - allow(::Resolv).to receive(:getaddresses).with(bbs_domain).and_return(Array.new(num_ips) { random_ip }) - call_count = 0 + describe '#request_with_error_handling' do + let(:http_client) { Net::HTTP.new(bbs_domain, bbs_port) } + + before do + allow(http_client).to receive(:ipaddr=).with(bbs_ip_1) + allow(http_client).to receive(:ipaddr=).with(bbs_ip_2) + allow(Net::HTTP).to receive(:new).and_return(http_client) + end + + context 'when all requests fail' do + before do + stub_request(:post, "https://#{bbs_domain}:#{bbs_port}/fake_path").to_raise(StandardError.new('error message')) + end + + it 'makes three attempts for each IP before raising an error' do expect { - client.with_request_error_handling do - call_count += 1 - _ = client.bbs_ip # stores IPs an instance variable and re-resolves when its empty - raise 'error' - end + client.request_with_error_handling(Net::HTTP::Post.new('/fake_path')) }.to raise_error(RequestError) - expect(call_count).to eq(num_ips * 3) + expect(a_request(:post, "https://#{bbs_domain}:#{bbs_port}/fake_path")).to have_been_made.times(6) + expect(http_client).to have_received(:ipaddr=).with(bbs_ip_1).exactly(3).times + expect(http_client).to have_received(:ipaddr=).with(bbs_ip_2).exactly(3).times end end - end - def random_ip - Array.new(4) { rand(256) }.join('.') + context 'when the first request succeeds' do + before do + stub_request(:post, "https://#{bbs_domain}:#{bbs_port}/fake_path").to_return(status: 200) + allow_any_instance_of(Net::HTTP).to receive(:ipaddr=).with(bbs_ip_1) + end + + it 'makes one attempt for the first IP' do + client.request_with_error_handling(Net::HTTP::Post.new('/fake_path')) + expect(a_request(:post, "https://#{bbs_domain}:#{bbs_port}/fake_path")).to have_been_made.once + expect(http_client).to have_received(:ipaddr=).with(bbs_ip_1).once + end + end + + context 'when the first four requests fail and the fifth succeeds' do + before do + stub_request(:post, "https://#{bbs_domain}:#{bbs_port}/fake_path"). + to_raise(StandardError.new('error message')).times(4).then. + to_return(status: 200) + end + + it 'makes five attempts' do + client.request_with_error_handling(Net::HTTP::Post.new('/fake_path')) + expect(a_request(:post, "https://#{bbs_domain}:#{bbs_port}/fake_path")).to have_been_made.times(5) + expect(http_client).to have_received(:ipaddr=).with(bbs_ip_1).exactly(3).times + expect(http_client).to have_received(:ipaddr=).with(bbs_ip_2).twice + end + end end end end diff --git a/spec/request/processes_spec.rb b/spec/request/processes_spec.rb index 7a766fce183..2ea5df213c7 100644 --- a/spec/request/processes_spec.rb +++ b/spec/request/processes_spec.rb @@ -11,7 +11,7 @@ let(:user) { VCAP::CloudController::User.make } let(:admin_header) { admin_headers_for(user) } let(:user_name) { 'ProcHudson' } - let(:build_client) { instance_double(HTTPClient, post: nil) } + let(:build_client) { instance_double(Net::HTTP, request: nil) } let(:metadata) do { labels: { @@ -22,7 +22,9 @@ } end before do - allow_any_instance_of(::Diego::Client).to receive(:build_client).and_return(build_client) + allow(build_client).to receive(:ipaddr=).and_return('1.2.3.4') + allow(::Resolv).to receive(:getaddresses).and_return(['1.2.3.4']) + allow_any_instance_of(::Diego::Client).to receive(:new_http_client).and_return(build_client) end describe 'GET /v3/processes' do diff --git a/spec/request/v2/apps_spec.rb b/spec/request/v2/apps_spec.rb index 2c706fe63a1..7a052fede89 100644 --- a/spec/request/v2/apps_spec.rb +++ b/spec/request/v2/apps_spec.rb @@ -4,13 +4,15 @@ RSpec.describe 'Apps' do let(:user) { VCAP::CloudController::User.make } let(:space) { VCAP::CloudController::Space.make } - let(:build_client) { instance_double(HTTPClient, post: nil) } + let(:build_client) { instance_double(Net::HTTP, request: nil) } before do space.organization.add_user(user) space.add_developer(user) TestConfig.override(kubernetes: {}) - allow_any_instance_of(::Diego::Client).to receive(:build_client).and_return(build_client) + allow(build_client).to receive(:ipaddr=).and_return('1.2.3.4') + allow(::Resolv).to receive(:getaddresses).and_return(['1.2.3.4']) + allow_any_instance_of(::Diego::Client).to receive(:new_http_client).and_return(build_client) end describe 'GET /v2/apps' do diff --git a/spec/request/v2/spaces_spec.rb b/spec/request/v2/spaces_spec.rb index 713af2d6554..3d63cf212bd 100644 --- a/spec/request/v2/spaces_spec.rb +++ b/spec/request/v2/spaces_spec.rb @@ -258,12 +258,14 @@ let(:maintenance_info) { { version: '1.0.0', desciption: 'this is description about the maintenance' } } let!(:service_plan) { VCAP::CloudController::ServicePlan.make(maintenance_info: maintenance_info) } let!(:service_instance) { VCAP::CloudController::ManagedServiceInstance.make(space: space, service_plan: service_plan, maintenance_info: maintenance_info) } - let(:build_client) { instance_double(HTTPClient, post: nil) } + let(:build_client) { instance_double(Net::HTTP, request: nil) } before do space.organization.add_user(user) space.add_developer(user) - allow_any_instance_of(::Diego::Client).to receive(:build_client).and_return(build_client) + allow(build_client).to receive(:ipaddr=).and_return('1.2.3.4') + allow(::Resolv).to receive(:getaddresses).and_return(['1.2.3.4']) + allow_any_instance_of(::Diego::Client).to receive(:new_http_client).and_return(build_client) end it 'returns the space summary' do From a21a76bd1b16c7f735197e389389c520286b3eb0 Mon Sep 17 00:00:00 2001 From: Will Gant Date: Tue, 8 Nov 2022 12:59:58 +0000 Subject: [PATCH 3/4] Add some logging to the diego client --- lib/diego/client.rb | 12 +++++++++--- spec/diego/client_spec.rb | 18 ++++++++++++++++++ 2 files changed, 27 insertions(+), 3 deletions(-) diff --git a/lib/diego/client.rb b/lib/diego/client.rb index c0348e5585a..4a3836dd62d 100644 --- a/lib/diego/client.rb +++ b/lib/diego/client.rb @@ -125,12 +125,14 @@ def actual_lrps_by_process_guid(process_guid) end def request_with_error_handling(req) - tries ||= 3 + attempt ||= 1 http_client.ipaddr = bbs_ip # tell the HTTP client which exact IP to target + logger.info("attempt #{attempt}: trying bbs endpoint #{req.path} on #{bbs_ip}") http_client.request(req) rescue => e - ips_remaining.shift - retry unless ips_remaining.empty? && (tries -= 1).zero? + eliminated_ip = ips_remaining.shift + logger.info("attempt #{attempt}: failed to reach bbs server on #{eliminated_ip}, removing from list") + retry unless ips_remaining.empty? && (attempt += 1) > 3 raise RequestError.new(e.message) end @@ -144,6 +146,10 @@ def bbs_ip attr_reader :http_client, :bbs_url attr_accessor :ips_remaining + def logger + @logger ||= Steno.logger('cc.diego.client') + end + def protobuf_encode!(hash, protobuf_message_class) # See below link to understand proto3 message encoding # https://developers.google.com/protocol-buffers/docs/reference/ruby-generated#message diff --git a/spec/diego/client_spec.rb b/spec/diego/client_spec.rb index 76e4e921dd6..6fa7f42679d 100644 --- a/spec/diego/client_spec.rb +++ b/spec/diego/client_spec.rb @@ -11,6 +11,7 @@ module Diego let(:client_key_file) { File.join(Paths::FIXTURES, 'certs/bbs_client.key') } let(:bbs_ip_1) { '1.2.3.4' } let(:bbs_ip_2) { '5.6.7.8' } + let(:logger) { instance_double(Steno::Logger) } subject(:client) do Client.new(url: bbs_uri, ca_cert_file: ca_cert_file, client_cert_file: client_cert_file, client_key_file: client_key_file, @@ -18,6 +19,8 @@ module Diego end before do allow(::Resolv).to receive(:getaddresses).with(bbs_domain).and_return([bbs_ip_1, bbs_ip_2]) + allow(Steno).to receive(:logger).and_return(logger) + allow(logger).to receive(:info) end describe 'configuration' do @@ -797,6 +800,21 @@ module Diego expect(http_client).to have_received(:ipaddr=).with(bbs_ip_2).twice end end + + context 'logging' do + before do + stub_request(:post, "https://#{bbs_domain}:#{bbs_port}/fake_path"). + to_raise(StandardError.new('error message')).then. + to_return(status: 200) + end + + it 'logs before each request and after each failed request' do + client.request_with_error_handling(Net::HTTP::Post.new('/fake_path')) + expect(logger).to have_received(:info).with(%r{attempt 1: trying bbs endpoint /fake_path on #{bbs_ip_1}}).once + expect(logger).to have_received(:info).with(/attempt 1: failed to reach bbs server on #{bbs_ip_1}, removing from list/).once + expect(logger).to have_received(:info).with(%r{attempt 1: trying bbs endpoint /fake_path on #{bbs_ip_2}}).once + end + end end end end From dce76a99bbd03204e2c75a8abd8c71298bf49e65 Mon Sep 17 00:00:00 2001 From: Will Gant Date: Thu, 10 Nov 2022 15:46:40 +0000 Subject: [PATCH 4/4] Address PR feedback --- lib/diego/client.rb | 34 ++++++++++++++++++---------------- lib/diego/errors.rb | 3 +++ spec/diego/client_spec.rb | 33 +++++++++++++++++++++++++++------ 3 files changed, 48 insertions(+), 22 deletions(-) diff --git a/lib/diego/client.rb b/lib/diego/client.rb index 4a3836dd62d..747be4e363c 100644 --- a/lib/diego/client.rb +++ b/lib/diego/client.rb @@ -21,7 +21,7 @@ def initialize(url:, ca_cert_file:, client_cert_file:, client_key_file:, end def ping - req = request(path: Routes::PING) + req = post_request(path: Routes::PING) response = request_with_error_handling(req) validate_status!(response) @@ -29,7 +29,7 @@ def ping end def upsert_domain(domain:, ttl:) - req = request(body: protobuf_encode!({ domain: domain, ttl: ttl.to_i }, Bbs::Models::UpsertDomainRequest), path: Routes::UPSERT_DOMAIN) + req = post_request(body: protobuf_encode!({ domain: domain, ttl: ttl.to_i }, Bbs::Models::UpsertDomainRequest), path: Routes::UPSERT_DOMAIN) response = request_with_error_handling(req) validate_status!(response) @@ -37,7 +37,8 @@ def upsert_domain(domain:, ttl:) end def desire_task(task_definition:, domain:, task_guid:) - req = request(body: protobuf_encode!({ task_definition: task_definition, domain: domain, task_guid: task_guid }, Bbs::Models::DesireTaskRequest), path: Routes::DESIRE_TASK) + req = post_request(body: protobuf_encode!({ task_definition: task_definition, domain: domain, task_guid: task_guid }, Bbs::Models::DesireTaskRequest), +path: Routes::DESIRE_TASK) response = request_with_error_handling(req) validate_status!(response) @@ -45,7 +46,7 @@ def desire_task(task_definition:, domain:, task_guid:) end def task_by_guid(task_guid) - req = request(body: protobuf_encode!({ task_guid: task_guid }, Bbs::Models::TaskByGuidRequest), path: Routes::TASK_BY_GUID) + req = post_request(body: protobuf_encode!({ task_guid: task_guid }, Bbs::Models::TaskByGuidRequest), path: Routes::TASK_BY_GUID) response = request_with_error_handling(req) validate_status!(response) @@ -53,7 +54,7 @@ def task_by_guid(task_guid) end def tasks(domain: '', cell_id: '') - req = request(body: protobuf_encode!({ domain: domain, cell_id: cell_id }, Bbs::Models::TasksRequest), path: Routes::LIST_TASKS) + req = post_request(body: protobuf_encode!({ domain: domain, cell_id: cell_id }, Bbs::Models::TasksRequest), path: Routes::LIST_TASKS) response = request_with_error_handling(req) validate_status!(response) @@ -61,7 +62,7 @@ def tasks(domain: '', cell_id: '') end def cancel_task(task_guid) - req = request(body: protobuf_encode!({ task_guid: task_guid }, Bbs::Models::TaskGuidRequest), path: Routes::CANCEL_TASK) + req = post_request(body: protobuf_encode!({ task_guid: task_guid }, Bbs::Models::TaskGuidRequest), path: Routes::CANCEL_TASK) response = request_with_error_handling(req) validate_status!(response) @@ -69,7 +70,7 @@ def cancel_task(task_guid) end def desire_lrp(lrp) - req = request(body: protobuf_encode!({ desired_lrp: lrp }, Bbs::Models::DesireLRPRequest), path: Routes::DESIRE_LRP) + req = post_request(body: protobuf_encode!({ desired_lrp: lrp }, Bbs::Models::DesireLRPRequest), path: Routes::DESIRE_LRP) response = request_with_error_handling(req) validate_status!(response) @@ -77,7 +78,7 @@ def desire_lrp(lrp) end def desired_lrp_by_process_guid(process_guid) - req = request(body: protobuf_encode!({ process_guid: process_guid }, Bbs::Models::DesiredLRPByProcessGuidRequest), path: Routes::DESIRED_LRP_BY_PROCESS_GUID) + req = post_request(body: protobuf_encode!({ process_guid: process_guid }, Bbs::Models::DesiredLRPByProcessGuidRequest), path: Routes::DESIRED_LRP_BY_PROCESS_GUID) response = request_with_error_handling(req) validate_status!(response) @@ -85,7 +86,7 @@ def desired_lrp_by_process_guid(process_guid) end def update_desired_lrp(process_guid, lrp_update) - req = request(body: protobuf_encode!({ process_guid: process_guid, update: lrp_update }, Bbs::Models::UpdateDesiredLRPRequest), path: Routes::UPDATE_DESIRED_LRP) + req = post_request(body: protobuf_encode!({ process_guid: process_guid, update: lrp_update }, Bbs::Models::UpdateDesiredLRPRequest), path: Routes::UPDATE_DESIRED_LRP) response = request_with_error_handling(req) validate_status!(response) @@ -93,7 +94,7 @@ def update_desired_lrp(process_guid, lrp_update) end def remove_desired_lrp(process_guid) - req = request(body: protobuf_encode!({ process_guid: process_guid }, Bbs::Models::RemoveDesiredLRPRequest), path: Routes::REMOVE_DESIRED_LRP) + req = post_request(body: protobuf_encode!({ process_guid: process_guid }, Bbs::Models::RemoveDesiredLRPRequest), path: Routes::REMOVE_DESIRED_LRP) response = request_with_error_handling(req) validate_status!(response) @@ -101,7 +102,7 @@ def remove_desired_lrp(process_guid) end def retire_actual_lrp(actual_lrp_key) - req = request(body: protobuf_encode!({ actual_lrp_key: actual_lrp_key }, Bbs::Models::RetireActualLRPRequest), path: Routes::RETIRE_ACTUAL_LRP) + req = post_request(body: protobuf_encode!({ actual_lrp_key: actual_lrp_key }, Bbs::Models::RetireActualLRPRequest), path: Routes::RETIRE_ACTUAL_LRP) response = request_with_error_handling(req) validate_status!(response) @@ -109,7 +110,7 @@ def retire_actual_lrp(actual_lrp_key) end def desired_lrp_scheduling_infos(domain) - req = request(body: protobuf_encode!({ domain: domain }, Bbs::Models::DesiredLRPsRequest), path: Routes::DESIRED_LRP_SCHEDULING_INFOS) + req = post_request(body: protobuf_encode!({ domain: domain }, Bbs::Models::DesiredLRPsRequest), path: Routes::DESIRED_LRP_SCHEDULING_INFOS) response = request_with_error_handling(req) validate_status!(response) @@ -117,7 +118,7 @@ def desired_lrp_scheduling_infos(domain) end def actual_lrps_by_process_guid(process_guid) - req = request(body: protobuf_encode!({ process_guid: process_guid }, Bbs::Models::ActualLRPsRequest), path: Routes::ACTUAL_LRPS) + req = post_request(body: protobuf_encode!({ process_guid: process_guid }, Bbs::Models::ActualLRPsRequest), path: Routes::ACTUAL_LRPS) response = request_with_error_handling(req) validate_status!(response) @@ -127,11 +128,12 @@ def actual_lrps_by_process_guid(process_guid) def request_with_error_handling(req) attempt ||= 1 http_client.ipaddr = bbs_ip # tell the HTTP client which exact IP to target - logger.info("attempt #{attempt}: trying bbs endpoint #{req.path} on #{bbs_ip}") http_client.request(req) + rescue Resolv::ResolvError, Resolv::ResolvTimeout => e + raise DnsResolutionError.new("dns resolution failed for #{bbs_url.host}: #{e.message}") rescue => e eliminated_ip = ips_remaining.shift - logger.info("attempt #{attempt}: failed to reach bbs server on #{eliminated_ip}, removing from list") + logger.debug("attempt #{attempt} of 3: failed to reach the active bbs server on #{eliminated_ip}, removing from list") retry unless ips_remaining.empty? && (attempt += 1) > 3 raise RequestError.new(e.message) end @@ -158,7 +160,7 @@ def protobuf_encode!(hash, protobuf_message_class) raise EncodeError.new(e.message) end - def request(body: nil, path:) + def post_request(body: nil, path:) req = Net::HTTP::Post.new(path) req.body = body if body req['Content-Type'.freeze] = 'application/x-protobuf'.freeze diff --git a/lib/diego/errors.rb b/lib/diego/errors.rb index cad25251d71..1275a7e0405 100644 --- a/lib/diego/errors.rb +++ b/lib/diego/errors.rb @@ -13,4 +13,7 @@ class DecodeError < Error class EncodeError < Error end + + class DnsResolutionError < Error + end end diff --git a/spec/diego/client_spec.rb b/spec/diego/client_spec.rb index 6fa7f42679d..fa2b94dc46e 100644 --- a/spec/diego/client_spec.rb +++ b/spec/diego/client_spec.rb @@ -18,9 +18,9 @@ module Diego connect_timeout: 10, send_timeout: 10, receive_timeout: 10) end before do - allow(::Resolv).to receive(:getaddresses).with(bbs_domain).and_return([bbs_ip_1, bbs_ip_2]) + allow(Resolv).to receive(:getaddresses).with(bbs_domain).and_return([bbs_ip_1, bbs_ip_2]) allow(Steno).to receive(:logger).and_return(logger) - allow(logger).to receive(:info) + allow(logger).to receive(:debug) end describe 'configuration' do @@ -801,18 +801,39 @@ module Diego end end + context 'when DNS resolution times out' do + before do + allow(Resolv).to receive(:getaddresses).and_raise(Resolv::ResolvTimeout.new('soz, you timed out')) + end + + it 'raises an error' do + expect { client.request_with_error_handling(Net::HTTP::Post.new('/fake_path')) }. + to raise_error(DnsResolutionError, /dns resolution failed for #{bbs_domain}: soz, you timed out/) + end + end + + context 'when we reach the DNS server but the host fails to resolve' do + before do + allow(Resolv).to receive(:getaddresses).and_raise(Resolv::ResolvError.new('your domain sucks')) + end + + it 'raises an error' do + expect { client.request_with_error_handling(Net::HTTP::Post.new('/fake_path')) }. + to raise_error(DnsResolutionError, /dns resolution failed for #{bbs_domain}: your domain sucks/) + end + end + context 'logging' do before do stub_request(:post, "https://#{bbs_domain}:#{bbs_port}/fake_path"). to_raise(StandardError.new('error message')).then. to_return(status: 200) + allow(http_client).to receive(:ipaddr=).and_call_original end - it 'logs before each request and after each failed request' do + it 'emits a debug log after each failed request' do client.request_with_error_handling(Net::HTTP::Post.new('/fake_path')) - expect(logger).to have_received(:info).with(%r{attempt 1: trying bbs endpoint /fake_path on #{bbs_ip_1}}).once - expect(logger).to have_received(:info).with(/attempt 1: failed to reach bbs server on #{bbs_ip_1}, removing from list/).once - expect(logger).to have_received(:info).with(%r{attempt 1: trying bbs endpoint /fake_path on #{bbs_ip_2}}).once + expect(logger).to have_received(:debug).with(/attempt 1 of 3: failed to reach the active bbs server on #{bbs_ip_1}, removing from list/).once end end end