From f8495f11bb6a244deaaff6d1be6e683359903a00 Mon Sep 17 00:00:00 2001 From: Nick Muerdter Date: Wed, 14 Sep 2016 17:45:25 -0600 Subject: [PATCH] Fix race condition on seeding database on startup. Improve startup logs. If nginx got started before MongoDB/Mora was fully responsive, there was a possibility seeding the initial database data would fail. This improves the seeding process by waiting for MongoDB to be fully up before proceeding with the seeding. Seeding will be re-attempted until it succeeds. Some related improvements were made to the similar Elasticsearch setup process. We were already waiting there to ensure Elasticsearch was up before running setup, but if Elasticsearch timed out, then setup wasn't re-attempted for another 1 hour. This improves things so that setup is reattempted on startup until it succeeds. This also quiets the startup logs by only logging events when MongoDB or Elasticsearch aren't ready after the 60 second timeout period (rather than on each connection attempt). --- .../proxy/jobs/elasticsearch_setup.lua | 24 ++++++--- .../proxy/startup/seed_database.lua | 49 +++++++++++++++---- src/api-umbrella/utils/mongo.lua | 6 +++ 3 files changed, 64 insertions(+), 15 deletions(-) diff --git a/src/api-umbrella/proxy/jobs/elasticsearch_setup.lua b/src/api-umbrella/proxy/jobs/elasticsearch_setup.lua index 254cba8d1..6015ba578 100644 --- a/src/api-umbrella/proxy/jobs/elasticsearch_setup.lua +++ b/src/api-umbrella/proxy/jobs/elasticsearch_setup.lua @@ -17,7 +17,7 @@ local function wait_for_elasticsearch() repeat local res, err = httpc:request_uri(elasticsearch_host .. "/_cluster/health") if err then - ngx.log(ngx.ERR, "failed to fetch cluster health from elasticsearch: ", err) + ngx.log(ngx.NOTICE, "failed to fetch cluster health from elasticsearch (this is expected if elasticsearch is starting up at the same time): ", err) elseif res.body then local elasticsearch_health = cjson.decode(res.body) if elasticsearch_health["status"] == "yellow" or elasticsearch_health["status"] == "green" then @@ -30,6 +30,12 @@ local function wait_for_elasticsearch() wait_time = wait_time + sleep_time end until elasticsearch_alive or wait_time > max_time + + if elasticsearch_alive then + return true, nil + else + return false, "elasticsearch was not ready within " .. max_time .."s" + end end local function create_templates() @@ -101,14 +107,20 @@ local function create_aliases() end end -local function do_check() - wait_for_elasticsearch() - create_templates() - create_aliases() +local function setup() + local _, err = wait_for_elasticsearch() + if not err then + create_templates() + create_aliases() + else + ngx.log(ngx.ERR, "timed out waiting for eleasticsearch before setup, rerunning...") + ngx.sleep(5) + setup() + end end function _M.spawn() - interval_lock.repeat_with_mutex('elasticsearch_index_setup', delay, do_check) + interval_lock.repeat_with_mutex('elasticsearch_index_setup', delay, setup) end return _M diff --git a/src/api-umbrella/proxy/startup/seed_database.lua b/src/api-umbrella/proxy/startup/seed_database.lua index 3f4bb4ffb..30b6867e1 100644 --- a/src/api-umbrella/proxy/startup/seed_database.lua +++ b/src/api-umbrella/proxy/startup/seed_database.lua @@ -1,11 +1,37 @@ local deep_merge_overwrite_arrays = require "api-umbrella.utils.deep_merge_overwrite_arrays" -local lock = require "resty.lock" +local interval_lock = require "api-umbrella.utils.interval_lock" local mongo = require "api-umbrella.utils.mongo" local random_token = require "api-umbrella.utils.random_token" local uuid = require "resty.uuid" local nowMongoDate = { ["$date"] = { ["$numberLong"] = tostring(os.time() * 1000) } } +local function wait_for_mongodb() + local mongodb_alive = false + local wait_time = 0 + local sleep_time = 0.5 + local max_time = 14 + repeat + local _, err = mongo.collections() + if err then + ngx.log(ngx.NOTICE, "failed to establish connection to mongodb (this is expected if mongodb is starting up at the same time): ", err) + else + mongodb_alive = true + end + + if not mongodb_alive then + ngx.sleep(sleep_time) + wait_time = wait_time + sleep_time + end + until mongodb_alive or wait_time > max_time + + if mongodb_alive then + return true, nil + else + return false, "elasticsearch was not ready within " .. max_time .."s" + end +end + local function seed_api_keys() local keys = { -- static.site.ajax@internal.apiumbrella @@ -208,19 +234,24 @@ local function seed_admin_permissions() end local function seed() - local seed_lock = lock:new("locks", { ["timeout"] = 0 }) - local _, lock_err = seed_lock:lock("seed_database") - if lock_err then - return + local _, err = wait_for_mongodb() + if not err then + seed_api_keys() + seed_initial_superusers() + seed_admin_permissions() + else + ngx.log(ngx.ERR, "timed out waiting for mongodb before seeding, rerunning...") + ngx.sleep(5) + seed() end +end - seed_api_keys() - seed_initial_superusers() - seed_admin_permissions() +local function seed_once() + interval_lock.mutex_exec("seed_database", seed) end return function() - local ok, err = ngx.timer.at(0, seed) + local ok, err = ngx.timer.at(0, seed_once) if not ok then ngx.log(ngx.ERR, "failed to create timer: ", err) return diff --git a/src/api-umbrella/utils/mongo.lua b/src/api-umbrella/utils/mongo.lua index f3d26fceb..04f225507 100644 --- a/src/api-umbrella/utils/mongo.lua +++ b/src/api-umbrella/utils/mongo.lua @@ -120,6 +120,12 @@ function _M.find(collection, query_options) return results, err end +function _M.collections() + local collection = "" + local query_options = {} + return _M.find(collection, query_options) +end + function _M.first(collection, query_options) if not query_options then query_options = {}