From e274d8697b8d377d40f73bcde5a5aca721546740 Mon Sep 17 00:00:00 2001 From: Nick Muerdter <12112+GUI@users.noreply.github.com> Date: Sun, 10 Mar 2019 13:00:44 -0600 Subject: [PATCH] Fix some edge-cases in rapid reloads causing config to go missing. This is likely only an issue in our test suite, but if sending rapid reload commands to the api-umbrella process, the TrafficServer config could go missing for a short period of time, which could lead to requests not being routed. This fixes it by only changing the config files if they actually differ (to prevent unnecessary reloads by TrafficServer), and then also ensuring that all config files are fully written to disk before being moved into place (so processes don't ever pick up on partially written config files). --- src/api-umbrella/cli/setup.lua | 41 +++++++++++++++---- .../api_umbrella_test_helpers/process.rb | 22 ++++++++-- 2 files changed, 50 insertions(+), 13 deletions(-) diff --git a/src/api-umbrella/cli/setup.lua b/src/api-umbrella/cli/setup.lua index 7151a1d33..b5846ae21 100644 --- a/src/api-umbrella/cli/setup.lua +++ b/src/api-umbrella/cli/setup.lua @@ -10,6 +10,7 @@ local run_command = require "api-umbrella.utils.run_command" local stat = require "posix.sys.stat" local tablex = require "pl.tablex" local unistd = require "posix.unistd" +local xpcall_error_handler = require "api-umbrella.utils.xpcall_error_handler" local chmod = stat.chmod local chown = unistd.chown @@ -142,6 +143,18 @@ local function ensure_geoip_db() end end +local function set_template_permissions(file_path, install_filename) + if config["group"] then + chown(file_path, nil, config["group"]) + end + + if install_filename == "rc.log" or install_filename == "rc.main" or install_filename == "rc.perp" then + chmod(file_path, tonumber("0750", 8)) + else + chmod(file_path, tonumber("0640", 8)) + end +end + local function write_templates() local template_root = path.join(config["_src_root_dir"], "templates/etc") for root, _, files in dir.walk(template_root) do @@ -176,17 +189,27 @@ local function write_templates() content = lustache:render(mustache_unescape(content), config) end - dir.makepath(path.dirname(install_path)) - file.write(install_path, content) - if config["group"] then - chown(install_path, nil, config["group"]) - end - local install_filename = path.basename(install_path) - if install_filename == "rc.log" or install_filename == "rc.main" or install_filename == "rc.perp" then - chmod(install_path, tonumber("0750", 8)) + + -- Only write the file if it differs from the existing file. This helps + -- prevents some processes, like Trafficserver, from thinking there are + -- config file updates to process on reloads if the file timestamps + -- change (even if there aren't actually any changes). + local _, existing_content = xpcall(file.read, xpcall_error_handler, install_path, true) + if content ~= existing_content then + -- Write the config file in an atomic fashion (by writing to a temp + -- file and then moving into place), so that during reloads the + -- processes never read a half-written file. + local install_dir = path.dirname(install_path) + local temp_path = path.tmpname() + file.write(temp_path, "") + set_template_permissions(temp_path, install_filename) + file.write(temp_path, content) + + dir.makepath(install_dir) + file.move(temp_path, install_path) else - chmod(install_path, tonumber("0640", 8)) + set_template_permissions(install_path, install_filename) end end end diff --git a/test/support/api_umbrella_test_helpers/process.rb b/test/support/api_umbrella_test_helpers/process.rb index 9374b5928..4efea8925 100644 --- a/test/support/api_umbrella_test_helpers/process.rb +++ b/test/support/api_umbrella_test_helpers/process.rb @@ -242,11 +242,25 @@ def perp_signal(service_name, signal_name) end def perp_pid(service_name) - output, _status = run_shell("perpstat", "-b", File.join($config["root_dir"], "etc/perp"), service_name) - matches = output.match(/^\s*main: up .*\(pid (\d+)\)\s*$/) pid = nil - if matches - pid = matches[1] + begin + # If api-umbrella is actively being reloaded, then the "perphup" signal + # sent to perp may temporarily result in perpstat not thinking services + # are activated until the reload has finished (so no pids will be + # returned). So retry fetching the pids for a while to account for this + # timing edge-case while reloads are being tested. + Timeout.timeout(5) do + loop do + output, _status = run_shell("perpstat", "-b", File.join($config["root_dir"], "etc/perp"), service_name) + matches = output.match(/^\s*main: up .*\(pid (\d+)\)\s*$/) + if matches + pid = matches[1] + break + end + end + end + rescue Timeout::Error # rubocop:disable Lint/HandleExceptions + # Ignore and return nil pid. end pid