Skip to content

Commit

Permalink
Improve safety and error handling of setting shared dict data.
Browse files Browse the repository at this point in the history
See 18F/api.data.gov#385

This replaces "set" calls with "safe_set" for the "active_config" shared
dict to prevent the possibility of publishing new API configuration
removing old API configuration when it exceeds the available memory
allocated for the shared dict. We also handle the possibility of
"safe_set" still unpublishing the old configuration when it exceeds the
available memory, which should ensure that the full API configuration
never gets unpublished.

This also adds more explicit error and warning logging to all the shared
dict "set" calls.
  • Loading branch information
GUI committed Aug 11, 2018
1 parent 00b6d7b commit cb5e2c1
Show file tree
Hide file tree
Showing 14 changed files with 310 additions and 44 deletions.
11 changes: 7 additions & 4 deletions src/api-umbrella/proxy/hooks/init.lua
Original file line number Diff line number Diff line change
Expand Up @@ -12,9 +12,9 @@ local incr_err
WORKER_GROUP_ID, incr_err = ngx.shared.active_config:incr("worker_group_id", 1)
if incr_err == "not found" then
WORKER_GROUP_ID = 1
local success, set_err = ngx.shared.active_config:set("worker_group_id", 1)
if not success then
ngx.log(ngx.ERR, "worker_group_id set err: ", set_err)
local set_ok, set_err = ngx.shared.active_config:safe_set("worker_group_id", 1)
if not set_ok then
ngx.log(ngx.ERR, "failed to set 'worker_group_id' in 'active_config' shared dict: ", set_err)
return
end
elseif incr_err then
Expand All @@ -23,4 +23,7 @@ end

ngx.shared.stats:delete("distributed_last_fetched_at")
ngx.shared.api_users:delete("last_fetched_at")
ngx.shared.active_config:set("elasticsearch_templates_created", false)
local set_ok, set_err = ngx.shared.active_config:safe_set("elasticsearch_templates_created", false)
if not set_ok then
ngx.log(ngx.ERR, "failed to set 'elasticsearch_templates_created' in 'active_config' shared dict: ", set_err)
end
1 change: 1 addition & 0 deletions src/api-umbrella/proxy/hooks/init_preload_modules.lua
Original file line number Diff line number Diff line change
Expand Up @@ -47,6 +47,7 @@ require "api-umbrella.utils.mongo"
require "api-umbrella.utils.mustache_unescape"
require "api-umbrella.utils.nillify_json_nulls"
require "api-umbrella.utils.nillify_yaml_nulls"
require "api-umbrella.utils.packed_shared_dict"
require "api-umbrella.utils.random_num"
require "api-umbrella.utils.random_seed"
require "api-umbrella.utils.random_token"
Expand Down
4 changes: 2 additions & 2 deletions src/api-umbrella/proxy/hooks/rewrite.lua
Original file line number Diff line number Diff line change
Expand Up @@ -2,11 +2,11 @@ local api_matcher = require "api-umbrella.proxy.middleware.api_matcher"
local config = require "api-umbrella.proxy.models.file_config"
local error_handler = require "api-umbrella.proxy.error_handler"
local host_normalize = require "api-umbrella.utils.host_normalize"
local packed_shared_dict = require "api-umbrella.utils.packed_shared_dict"
local redirect_matches_to_https = require "api-umbrella.utils.redirect_matches_to_https"
local utils = require "api-umbrella.proxy.utils"
local website_matcher = require "api-umbrella.proxy.middleware.website_matcher"

local get_packed = utils.get_packed
local get_packed = packed_shared_dict.get_packed
local ngx_var = ngx.var

-- Determine the protocol/scheme and port this connection represents, based on
Expand Down
30 changes: 21 additions & 9 deletions src/api-umbrella/proxy/jobs/distributed_rate_limit_puller.lua
Original file line number Diff line number Diff line change
Expand Up @@ -2,12 +2,12 @@ local _M = {}

local interval_lock = require "api-umbrella.utils.interval_lock"
local mongo = require "api-umbrella.utils.mongo"
local packed_shared_dict = require "api-umbrella.utils.packed_shared_dict"
local types = require "pl.types"
local utils = require "api-umbrella.proxy.utils"

local get_packed = utils.get_packed
local get_packed = packed_shared_dict.get_packed
local is_empty = types.is_empty
local set_packed = utils.set_packed
local set_packed = packed_shared_dict.set_packed

local delay = 0.25 -- in seconds

Expand Down Expand Up @@ -39,7 +39,12 @@ local function do_check()
for index, result in ipairs(results) do
if skip == 0 and index == 1 then
if result["ts"] and result["ts"]["$timestamp"] then
set_packed(ngx.shared.stats, "distributed_last_fetched_timestamp", result["ts"]["$timestamp"])
local set_ok, set_err, set_forcible = set_packed(ngx.shared.stats, "distributed_last_fetched_timestamp", result["ts"]["$timestamp"])
if not set_ok then
ngx.log(ngx.ERR, "failed to set 'distributed_last_fetched_timestamp' in 'stats' shared dict: ", set_err)
elseif set_forcible then
ngx.log(ngx.WARN, "forcibly set 'distributed_last_fetched_timestamp' in 'stats' shared dict (shared dict may be too small)")
end
end
end

Expand All @@ -54,16 +59,18 @@ local function do_check()
ttl = 3600
end

local _, set_err = ngx.shared.stats:set(key, distributed_count, ttl)
if set_err then
ngx.log(ngx.ERR, "failed to set rate limit key", set_err)
local set_ok, set_err, set_forcible = ngx.shared.stats:set(key, distributed_count, ttl)
if not set_ok then
ngx.log(ngx.ERR, "failed to set rate limit key in 'stats' shared dict: ", set_err)
elseif set_forcible then
ngx.log(ngx.WARN, "forcibly set rate limit key in 'stats' shared dict (shared dict may be too small)")
end
end
elseif distributed_count > local_count then
local incr = distributed_count - local_count
local _, incr_err = ngx.shared.stats:incr(key, incr)
if incr_err then
ngx.log(ngx.ERR, "failed to increment rate limit key", incr_err)
ngx.log(ngx.ERR, "failed to increment rate limit key: ", incr_err)
end
end
end
Expand All @@ -73,7 +80,12 @@ local function do_check()
until is_empty(results)

if success then
ngx.shared.stats:set("distributed_last_pulled_at", current_fetch_time)
local set_ok, set_err, set_forcible = ngx.shared.stats:set("distributed_last_pulled_at", current_fetch_time)
if not set_ok then
ngx.log(ngx.ERR, "failed to set 'distributed_last_pulled_at' in 'stats' shared dict: ", set_err)
elseif set_forcible then
ngx.log(ngx.WARN, "forcibly set 'distributed_last_pulled_at' in 'stats' shared dict (shared dict may be too small)")
end
end
end

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -84,7 +84,12 @@ local function do_check()
end

if success then
ngx.shared.stats:set("distributed_last_pushed_at", current_save_time)
local set_ok, set_err, set_forcible = ngx.shared.stats:set("distributed_last_pushed_at", current_save_time)
if not set_ok then
ngx.log(ngx.ERR, "failed to set 'distributed_last_pushed_at' in 'stats' shared dict: ", set_err)
elseif set_forcible then
ngx.log(ngx.WARN, "forcibly set 'distributed_last_pushed_at' in 'stats' shared dict (shared dict may be too small)")
end
end
end

Expand Down
5 changes: 4 additions & 1 deletion src/api-umbrella/proxy/jobs/elasticsearch_setup.lua
Original file line number Diff line number Diff line change
Expand Up @@ -50,7 +50,10 @@ function _M.create_templates()
end
end

ngx.shared.active_config:set("elasticsearch_templates_created", true)
local set_ok, set_err = ngx.shared.active_config:safe_set("elasticsearch_templates_created", true)
if not set_ok then
ngx.log(ngx.ERR, "failed to set 'elasticsearch_templates_created' in 'active_config' shared dict: ", set_err)
end
end

function _M.create_aliases()
Expand Down
20 changes: 15 additions & 5 deletions src/api-umbrella/proxy/jobs/load_api_users.lua
Original file line number Diff line number Diff line change
Expand Up @@ -2,12 +2,12 @@ local _M = {}

local interval_lock = require "api-umbrella.utils.interval_lock"
local mongo = require "api-umbrella.utils.mongo"
local packed_shared_dict = require "api-umbrella.utils.packed_shared_dict"
local types = require "pl.types"
local utils = require "api-umbrella.proxy.utils"

local get_packed = utils.get_packed
local get_packed = packed_shared_dict.get_packed
local is_empty = types.is_empty
local set_packed = utils.set_packed
local set_packed = packed_shared_dict.set_packed

local api_users = ngx.shared.api_users

Expand Down Expand Up @@ -41,7 +41,12 @@ local function do_check()
for index, result in ipairs(results) do
if skip == 0 and index == 1 then
if result["ts"] and result["ts"]["$timestamp"] then
set_packed(api_users, "distributed_last_fetched_timestamp", result["ts"]["$timestamp"])
local set_ok, set_err, set_forcible = set_packed(api_users, "distributed_last_fetched_timestamp", result["ts"]["$timestamp"])
if not set_ok then
ngx.log(ngx.ERR, "failed to set 'distributed_last_fetched_timestamp' in 'api_users' shared dict: ", set_err)
elseif set_forcible then
ngx.log(ngx.WARN, "forcibly set 'distributed_last_fetched_timestamp' in 'api_users' shared dict (shared dict may be too small)")
end
end
end

Expand All @@ -55,7 +60,12 @@ local function do_check()
until is_empty(results)

if success then
api_users:set("last_fetched_at", current_fetch_time)
local set_ok, set_err, set_forcible = api_users:set("last_fetched_at", current_fetch_time)
if not set_ok then
ngx.log(ngx.ERR, "failed to set 'last_fetched_at' in 'api_users' shared dict: ", set_err)
elseif set_forcible then
ngx.log(ngx.WARN, "forcibly set 'last_fetched_at' in 'api_users' shared dict (shared dict may be too small)")
end
end
end

Expand Down
5 changes: 4 additions & 1 deletion src/api-umbrella/proxy/jobs/load_db_config.lua
Original file line number Diff line number Diff line change
Expand Up @@ -52,7 +52,10 @@ local function do_check()
end

if last_fetched_at then
ngx.shared.active_config:set("db_config_last_fetched_at", last_fetched_at)
local set_ok, set_err = ngx.shared.active_config:safe_set("db_config_last_fetched_at", last_fetched_at)
if not set_ok then
ngx.log(ngx.ERR, "failed to set 'db_config_last_fetched_at' in 'active_config' shared dict: ", set_err)
end
end
end

Expand Down
7 changes: 6 additions & 1 deletion src/api-umbrella/proxy/log_utils.lua
Original file line number Diff line number Diff line change
Expand Up @@ -218,7 +218,12 @@ function _M.cache_new_city_geocode(data)
-- Only cache the first city location per startup to prevent lots of indexing
-- churn re-indexing the same city.
if not ngx.shared.geocode_city_cache:get(id) then
ngx.shared.geocode_city_cache:set(id, true)
local set_ok, set_err, set_forcible = ngx.shared.geocode_city_cache:set(id, true)
if not set_ok then
ngx.log(ngx.ERR, "failed to set city in 'geocode_city_cache' shared dict: ", set_err)
elseif set_forcible then
ngx.log(ngx.WARN, "forcibly set city in 'geocode_city_cache' shared dict (shared dict may be too small)")
end

-- Perform the actual cache call in a timer because the http library isn't
-- supported directly in the log_by_lua context.
Expand Down
52 changes: 47 additions & 5 deletions src/api-umbrella/proxy/models/active_config.lua
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@ local escape_regex = require "api-umbrella.utils.escape_regex"
local host_normalize = require "api-umbrella.utils.host_normalize"
local json_encode = require "api-umbrella.utils.json_encode"
local mustache_unescape = require "api-umbrella.utils.mustache_unescape"
local packed_shared_dict = require "api-umbrella.utils.packed_shared_dict"
local plutils = require "pl.utils"
local random_token = require "api-umbrella.utils.random_token"
local startswith = require("pl.stringx").startswith
Expand All @@ -14,7 +15,7 @@ local xpcall_error_handler = require "api-umbrella.utils.xpcall_error_handler"
local append_array = utils.append_array
local cache_computed_settings = utils.cache_computed_settings
local deepcopy = tablex.deepcopy
local set_packed = utils.set_packed
local safe_set_packed = packed_shared_dict.safe_set_packed
local size = tablex.size
local split = plutils.split

Expand Down Expand Up @@ -256,10 +257,51 @@ function _M.set(db_config)
local website_backends = get_combined_website_backends(file_config, db_config)

local active_config = build_active_config(apis, website_backends)
set_packed(ngx.shared.active_config, "packed_data", active_config)
ngx.shared.active_config:set("db_version", db_config["version"])
ngx.shared.active_config:set("file_version", file_config["version"])
ngx.shared.active_config:set("worker_group_setup_complete:" .. WORKER_GROUP_ID, true)
local previous_packed_config = ngx.shared.active_config:get("packed_data")

local set_ok, set_err = safe_set_packed(ngx.shared.active_config, "packed_data", active_config)
if not set_ok then
ngx.log(ngx.ERR, "failed to set 'packed_data' in 'active_config' shared dict: ", set_err)

-- If the new config exceeds the amount of available space, `safe_set` will
-- still result in the previous value for `packed_data` getting removed
-- (see https://github.com/openresty/lua-nginx-module/issues/1365). This
-- effectively unpublishes all configuration, which can be disruptive if
-- your new config happens to exceed the allocated space.
--
-- So to more safely handle this scenario, revert `packed_data` back to the
-- previously set value (that presumably fits in memory) so that the
-- previous config remains in place. The new configuration will go
-- unpublished, but this at least keeps the system up in the previous
-- state.
--
-- When this occurs, we will go ahead and set the `db_version` and
-- `file_version` to the new versions, even though this isn't entirely
-- accurate (since we're reverting to the previous config). But by
-- pretending the data was successfully set, this prevents the system from
-- looping indefinitely and trying to set the config over and over to a
-- version that won't fit in memory. In this situation, there's not much
-- else we can do, since the shdict memory needs to be increased.
set_ok, set_err = ngx.shared.active_config:safe_set("packed_data", previous_packed_config)
if not set_ok then
ngx.log(ngx.ERR, "failed to set 'packed_data' in 'active_config' shared dict: ", set_err)
end
end

set_ok, set_err = ngx.shared.active_config:safe_set("db_version", db_config["version"])
if not set_ok then
ngx.log(ngx.ERR, "failed to set 'db_version' in 'active_config' shared dict: ", set_err)
end

set_ok, set_err = ngx.shared.active_config:safe_set("file_version", file_config["version"])
if not set_ok then
ngx.log(ngx.ERR, "failed to set 'file_version' in 'active_config' shared dict: ", set_err)
end

set_ok, set_err = ngx.shared.active_config:safe_set("worker_group_setup_complete:" .. WORKER_GROUP_ID, true)
if not set_ok then
ngx.log(ngx.ERR, "failed to set 'worker_group_setup_complete' in 'active_config' shared dict: ", set_err)
end
end

return _M
14 changes: 0 additions & 14 deletions src/api-umbrella/proxy/utils.lua
Original file line number Diff line number Diff line change
@@ -1,6 +1,5 @@
local _M = {}

local cmsgpack = require "cmsgpack"
local json_null = require("cjson").null
local plutils = require "pl.utils"
local stringx = require "pl.stringx"
Expand All @@ -10,11 +9,9 @@ local types = require "pl.types"
local escape = plutils.escape
local gsub = ngx.re.gsub
local is_empty = types.is_empty
local pack = cmsgpack.pack
local split = plutils.split
local strip = stringx.strip
local table_keys = tablex.keys
local unpack = cmsgpack.unpack

-- Append an array to the end of the destination array.
--
Expand Down Expand Up @@ -48,17 +45,6 @@ function _M.base_url()
return base
end

function _M.get_packed(dict, key)
local packed = dict:get(key)
if packed then
return unpack(packed)
end
end

function _M.set_packed(dict, key, value)
return dict:set(key, pack(value))
end

function _M.pick_where_present(dict, keys)
local selected = {}

Expand Down
23 changes: 23 additions & 0 deletions src/api-umbrella/utils/packed_shared_dict.lua
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
local cmsgpack = require "cmsgpack"

local pack = cmsgpack.pack
local unpack = cmsgpack.unpack

local _M = {}

function _M.get_packed(dict, key)
local packed = dict:get(key)
if packed then
return unpack(packed)
end
end

function _M.set_packed(dict, key, value)
return dict:set(key, pack(value))
end

function _M.safe_set_packed(dict, key, value)
return dict:safe_set(key, pack(value))
end

return _M
Loading

0 comments on commit cb5e2c1

Please sign in to comment.