Skip to content

Commit

Permalink
Fix some edge-cases in rapid reloads causing config to go missing.
Browse files Browse the repository at this point in the history
This is likely only an issue in our test suite, but if sending rapid
reload commands to the api-umbrella process, the TrafficServer config
could go missing for a short period of time, which could lead to
requests not being routed. This fixes it by only changing the config
files if they actually differ (to prevent unnecessary reloads by
TrafficServer), and then also ensuring that all config files are fully
written to disk before being moved into place (so processes don't ever
pick up on partially written config files).
  • Loading branch information
GUI committed Mar 10, 2019
1 parent 56a2367 commit e274d86
Show file tree
Hide file tree
Showing 2 changed files with 50 additions and 13 deletions.
41 changes: 32 additions & 9 deletions src/api-umbrella/cli/setup.lua
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@ local run_command = require "api-umbrella.utils.run_command"
local stat = require "posix.sys.stat"
local tablex = require "pl.tablex"
local unistd = require "posix.unistd"
local xpcall_error_handler = require "api-umbrella.utils.xpcall_error_handler"

local chmod = stat.chmod
local chown = unistd.chown
Expand Down Expand Up @@ -142,6 +143,18 @@ local function ensure_geoip_db()
end
end

local function set_template_permissions(file_path, install_filename)
if config["group"] then
chown(file_path, nil, config["group"])
end

if install_filename == "rc.log" or install_filename == "rc.main" or install_filename == "rc.perp" then
chmod(file_path, tonumber("0750", 8))
else
chmod(file_path, tonumber("0640", 8))
end
end

local function write_templates()
local template_root = path.join(config["_src_root_dir"], "templates/etc")
for root, _, files in dir.walk(template_root) do
Expand Down Expand Up @@ -176,17 +189,27 @@ local function write_templates()
content = lustache:render(mustache_unescape(content), config)
end

dir.makepath(path.dirname(install_path))
file.write(install_path, content)
if config["group"] then
chown(install_path, nil, config["group"])
end

local install_filename = path.basename(install_path)
if install_filename == "rc.log" or install_filename == "rc.main" or install_filename == "rc.perp" then
chmod(install_path, tonumber("0750", 8))

-- Only write the file if it differs from the existing file. This helps
-- prevents some processes, like Trafficserver, from thinking there are
-- config file updates to process on reloads if the file timestamps
-- change (even if there aren't actually any changes).
local _, existing_content = xpcall(file.read, xpcall_error_handler, install_path, true)
if content ~= existing_content then
-- Write the config file in an atomic fashion (by writing to a temp
-- file and then moving into place), so that during reloads the
-- processes never read a half-written file.
local install_dir = path.dirname(install_path)
local temp_path = path.tmpname()
file.write(temp_path, "")
set_template_permissions(temp_path, install_filename)
file.write(temp_path, content)

dir.makepath(install_dir)
file.move(temp_path, install_path)
else
chmod(install_path, tonumber("0640", 8))
set_template_permissions(install_path, install_filename)
end
end
end
Expand Down
22 changes: 18 additions & 4 deletions test/support/api_umbrella_test_helpers/process.rb
Original file line number Diff line number Diff line change
Expand Up @@ -242,11 +242,25 @@ def perp_signal(service_name, signal_name)
end

def perp_pid(service_name)
output, _status = run_shell("perpstat", "-b", File.join($config["root_dir"], "etc/perp"), service_name)
matches = output.match(/^\s*main: up .*\(pid (\d+)\)\s*$/)
pid = nil
if matches
pid = matches[1]
begin
# If api-umbrella is actively being reloaded, then the "perphup" signal
# sent to perp may temporarily result in perpstat not thinking services
# are activated until the reload has finished (so no pids will be
# returned). So retry fetching the pids for a while to account for this
# timing edge-case while reloads are being tested.
Timeout.timeout(5) do
loop do
output, _status = run_shell("perpstat", "-b", File.join($config["root_dir"], "etc/perp"), service_name)
matches = output.match(/^\s*main: up .*\(pid (\d+)\)\s*$/)
if matches
pid = matches[1]
break
end
end
end
rescue Timeout::Error # rubocop:disable Lint/HandleExceptions
# Ignore and return nil pid.
end

pid
Expand Down

0 comments on commit e274d86

Please sign in to comment.