Skip to content

Commit

Permalink
Improve resiliency of proxy during MongoDB replicaset changes.
Browse files Browse the repository at this point in the history
If connecting to MongoDB that's part of a replicaset, these changes help
improve the failover handling during replicaset changes (either planned
or unplanned).

The default read preference is now "primaryPreferred," which allows
connections to secondaries during replicaset changes when a primary has
not been elected yet. Mora was previously lacking this read preference
functionality, so a pull request to Mora has been submitted to provide
support.

We're also handling more potential edge-cases that might crop up during
replicaset changes, and retrying the queries in those cases. This better
ensures no connections are dropped, even in the event of unexpected
replicaset changes.

The updates to the testing suite helped uncover these failover
edge-cases, so these improvements should also resolve the possibility of
sporadic test failures.
  • Loading branch information
GUI committed Dec 2, 2016
1 parent 7d63797 commit 8990348
Show file tree
Hide file tree
Showing 14 changed files with 126 additions and 82 deletions.
3 changes: 3 additions & 0 deletions Gemfile
Original file line number Diff line number Diff line change
Expand Up @@ -64,5 +64,8 @@ gem "lazyhash", "~> 0.1.1"
# Generating fake strings and data.
gem "faker", "~> 1.6.6"

# Concurrency helpers.
gem "concurrent-ruby", "~> 1.0.2"

# Debug printing
gem "awesome_print", "~> 1.7.0"
3 changes: 2 additions & 1 deletion Gemfile.lock
Original file line number Diff line number Diff line change
Expand Up @@ -146,6 +146,7 @@ DEPENDENCIES
capybara (~> 2.10.1)
capybara-screenshot (~> 1.0.14)
childprocess (~> 0.5.9)
concurrent-ruby (~> 1.0.2)
database_cleaner (~> 1.5.3)
elasticsearch (~> 2.0.0)
elasticsearch-persistence (~> 0.1.9)
Expand All @@ -168,4 +169,4 @@ DEPENDENCIES
typhoeus (~> 1.1.0)

BUNDLED WITH
1.13.1
1.13.6
8 changes: 5 additions & 3 deletions build/cmake/mora.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -26,15 +26,17 @@ set(GLIDE_SOURCE_DIR ${SOURCE_DIR})
ExternalProject_Add(
mora
DEPENDS glide
URL https://github.com/emicklei/mora/archive/${MORA_VERSION}.tar.gz
# Use fork for read preference support:
# https://github.com/emicklei/mora/pull/44
URL https://github.com/GUI/mora/archive/${MORA_VERSION}.tar.gz
URL_HASH MD5=${MORA_HASH}
SOURCE_DIR ${WORK_DIR}/gocode/src/github.com/emicklei/mora
BUILD_IN_SOURCE 1
CONFIGURE_COMMAND ""
BUILD_COMMAND cp ${CMAKE_SOURCE_DIR}/build/mora/glide.yaml <SOURCE_DIR>/glide.yaml
COMMAND cp ${CMAKE_SOURCE_DIR}/build/mora/glide.lock <SOURCE_DIR>/glide.lock
COMMAND env PATH=${GOLANG_SOURCE_DIR}/bin:${GLIDE_SOURCE_DIR}:${WORK_DIR}/gocode/bin:$ENV{PATH} GOPATH=${WORK_DIR}/gocode GOROOT=${GOLANG_SOURCE_DIR} GO15VENDOREXPERIMENT=1 glide install
COMMAND env PATH=${GOLANG_SOURCE_DIR}/bin:${GLIDE_SOURCE_DIR}:${WORK_DIR}/gocode/bin:$ENV{PATH} GOPATH=${WORK_DIR}/gocode GOROOT=${GOLANG_SOURCE_DIR} GO15VENDOREXPERIMENT=1 go install
COMMAND env PATH=${GOLANG_SOURCE_DIR}/bin:${GLIDE_SOURCE_DIR}:${WORK_DIR}/gocode/bin:$ENV{PATH} GOPATH=${WORK_DIR}/gocode GOROOT=${GOLANG_SOURCE_DIR} glide install
COMMAND env PATH=${GOLANG_SOURCE_DIR}/bin:${GLIDE_SOURCE_DIR}:${WORK_DIR}/gocode/bin:$ENV{PATH} GOPATH=${WORK_DIR}/gocode GOROOT=${GOLANG_SOURCE_DIR} go install
INSTALL_COMMAND install -D -m 755 ${WORK_DIR}/gocode/bin/mora ${STAGE_EMBEDDED_DIR}/bin/mora
)
ExternalProject_Add_Step(
Expand Down
4 changes: 2 additions & 2 deletions build/cmake/versions.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -68,8 +68,8 @@ set(MAILHOG_HASH 6602fd7f69276e7efba310362e958133)
set(MONGO_ORCHESTRATION_VERSION 0.6.7)
set(MONGODB_VERSION 3.2.11)
set(MONGODB_HASH 9916a076bd2e2fa8e8fbad94bb083fae)
set(MORA_VERSION 4cae0b86a440356cc3b669fb76343ac514c99655)
set(MORA_HASH 6764886ca9b8c5302e93597c4500bfd3)
set(MORA_VERSION 02c69fb82839e4fc2c8415c763f1934b7cf7dd4f)
set(MORA_HASH 8030e2869ac1e9b6ef90c770cd4e946f)
set(NGX_DYUPS_VERSION d4b3e053dee10e2879882eb4c346ac7d534e2d14)
set(NGX_DYUPS_HASH bdf4408599602afa38365a426e126d21)
set(NGX_TXID_VERSION f1c197cb9c42e364a87fbb28d5508e486592ca42)
Expand Down
18 changes: 7 additions & 11 deletions build/mora/glide.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

10 changes: 2 additions & 8 deletions build/mora/glide.yaml
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
package: .
package: github.com/emicklei/mora
import:
- package: github.com/compose/mejson
- package: github.com/emicklei/go-restful
Expand All @@ -7,15 +7,9 @@ import:
version: ~1.1.3
subpackages:
- swagger
- package: github.com/emicklei/mora
subpackages:
- api/documents
- api/response
- api/statistics
- session
- package: github.com/magiconair/properties
version: ~1.7.0
- package: gopkg.in/mgo.v2
version: r2016.02.04
version: r2016.08.01
subpackages:
- bson
4 changes: 3 additions & 1 deletion build/scripts/outdated
Original file line number Diff line number Diff line change
Expand Up @@ -118,7 +118,9 @@ repos = {
:constraint => "~> 3.2.9",
},
"mora" => {
:git => "https://github.com/emicklei/mora.git",
# Use fork for read preference support:
# https://github.com/emicklei/mora/pull/44
:git => "https://github.com/GUI/mora.git",
:git_ref => "master",
},
"ngx_dyups" => {
Expand Down
9 changes: 1 addition & 8 deletions config/default.yml
Original file line number Diff line number Diff line change
Expand Up @@ -128,14 +128,7 @@ dns_resolver:
retries: 3
mongodb:
url: "mongodb://127.0.0.1:14001/api_umbrella"
options:
server:
auto_reconnect: true
socketOptions:
keepAlive: 500
replset:
socketOptions:
keepAlive: 500
read_preference: primaryPreferred
embedded_server_config:
processManagement:
fork: false
Expand Down
37 changes: 28 additions & 9 deletions src/api-umbrella/utils/mongo.lua
Original file line number Diff line number Diff line change
@@ -1,8 +1,10 @@
local cjson = require "cjson"
local http = require "resty.http"
local stringx = require "pl.stringx"
local types = require "pl.types"

local is_empty = types.is_empty
local startswith = stringx.startswith

local _M = {}

Expand Down Expand Up @@ -68,22 +70,39 @@ local function perform_query(path, query_options, http_options)

local response, err = try_query(path, http_options)

-- If we get an "EOF" error from Mora, this means our query occurred during
-- the middle of a server or replicaset change. In this case, retry the
-- request a couple more times.
-- If we certain types of errors from Mora, this means our query occurred
-- during the middle of a server or replicaset change. In this case, retry
-- the request a few more times.
--
-- This should be less likely in mora since
-- https://github.com/emicklei/mora/pull/29, but it's still possible for this
-- to crop up if the socket gets closed sometime between the request starting
-- and the query actually executing. After more research, this seems to be
-- and the query actually executing. This can also happen in case of
-- unexpected mongod shutdowns. After more research, this seems to be
-- expected mgo behavior, and it's up to the app to handle these type of
-- errors. I'm not entirely sure whether we should try to address the issue
-- in mora itself, but in the meantime, we'll retry here.
if err and err == "mongodb error: EOF" then
response, err = try_query(path, http_options)
if err and err == "mongodb error: EOF" then
ngx.sleep(0.5)
response, err = try_query(path, http_options)
if err then
-- Loop to retry a few times until no errors occurs or we give up, since we
-- don't want to wait forever.
local retries = 0
while err and retries < 5 do
if err == "mongodb error: EOF"
or err == "mongodb error: node is recovering"
or err == "mongodb error: interrupted at shutdown"
or err == "mongodb error: Closed explicitly"
or startswith(err, "mongodb error: read tcp")
then
-- Retry immediately, then sleep between further retries.
retries = retries + 1
if retries > 1 then
ngx.sleep(0.5)
end

response, err = try_query(path, http_options)
else
break
end
end
end

Expand Down
22 changes: 22 additions & 0 deletions src/api-umbrella/web-app/config/application.rb
Original file line number Diff line number Diff line change
Expand Up @@ -87,6 +87,28 @@ class Application < Rails::Application
require "js_locale_helper"
end

# Instead of loading from a mongoid.yml file, load the Mongoid config in
# code, where it's easier to merge settings from our API Umbrella
# configuration.
initializer "mongoid-config", :after => "mongoid.load-config" do
config = {
:clients => {
:default => {
:uri => ApiUmbrellaConfig[:mongodb][:url],
:options => {
:read => {
:mode => ApiUmbrellaConfig[:mongodb][:read_preference].underscore.to_sym,
},
},
},
},
}

Mongoid::Clients.disconnect
Mongoid::Clients.clear
Mongoid.load_configuration(config)
end

# Settings in config/environments/* take precedence over those specified here.
# Application configuration should go into files in config/initializers
# -- all .rb files in that directory are automatically loaded.
Expand Down
19 changes: 0 additions & 19 deletions src/api-umbrella/web-app/config/mongoid.yml

This file was deleted.

1 change: 1 addition & 0 deletions templates/etc/mora.properties.mustache
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
http.server.host={{mora.host}}
http.server.port={{mora.port}}
mongod.api_umbrella.uri={{mongodb.url}}
mongod.api_umbrella.mode={{mongodb.read_preference}}
mongod.api_umbrella.timeout={{mora.timeout}}
64 changes: 47 additions & 17 deletions test/proxy/test_mongodb_replica_set.rb
Original file line number Diff line number Diff line change
Expand Up @@ -35,9 +35,9 @@ def setup
Mongoid::Clients.disconnect
Mongoid::Clients.clear
Mongoid.load_configuration({
"clients" => {
"default" => {
"uri" => mongodb_url,
:clients => {
:default => {
:uri => mongodb_url,
},
},
})
Expand Down Expand Up @@ -71,9 +71,9 @@ def after_all
Mongoid::Clients.disconnect
Mongoid::Clients.clear
Mongoid.load_configuration({
"clients" => {
"default" => {
"uri" => $config["mongodb"]["url"],
:clients => {
:default => {
:uri => $config["mongodb"]["url"],
},
},
})
Expand Down Expand Up @@ -101,15 +101,29 @@ def test_no_dropped_connections_during_replica_set_elections
}))
assert_response_code(403, response)

# Pre-create an array of unique API keys to be used during tests.
#
# If more than 100 unique API keys are needed during the test, the number
# we create may need to be increased, but currently we're using far less
# than this on average.
#
# We do this before the tests start, so we're not dealing with the
# edge-case of inserts being attempted right as a primary server changes or
# shuts down. We're more interested with testing the read-only
# functionality of the proxy when the primary changes (eg, that the proxy
# can continue querying for valid API keys).
@users = Concurrent::Array.new(FactoryGirl.create_list(:api_user, 100, {
:settings => {
:rate_limit_mode => "unlimited",
},
}))

# Perform parallel requests constantly in the background of this tests.
# This ensures that no connections are dropped during any point of the
# replica set changes we'll make later on.
request_thread = Thread.new do
user = FactoryGirl.create(:api_user, {
:settings => {
:rate_limit_mode => "unlimited",
},
})
# Pop a new unique API key off to use for this set of tests.
user = @users.shift

loop do
hydra = Typhoeus::Hydra.new(:max_concurrency => 5)
Expand Down Expand Up @@ -165,6 +179,7 @@ def test_no_dropped_connections_during_replica_set_elections
mongo_orchestration(:patch, "/v1/replica_sets/test-cluster/members/#{@initial_primary_replica_id}", {
:rsParams => { :priority => 99 },
})
wait_for_num_tests(100)

request_thread.exit
end
Expand Down Expand Up @@ -200,11 +215,8 @@ def wait_for_primary_change
# just slow down during replica set changes (in which case the background
# requests might not end up performing many requests).
def wait_for_num_tests(count)
user = FactoryGirl.create(:api_user, {
:settings => {
:rate_limit_mode => "unlimited",
},
})
# Pop a new unique API key off to use for this set of tests.
user = @users.shift

hydra = Typhoeus::Hydra.new(:max_concurrency => 5)
count.times do
Expand All @@ -219,7 +231,12 @@ def wait_for_num_tests(count)
hydra.run
end

class MongoOrchestrationError < StandardError
end

def mongo_orchestration(http_method, path, data = {})
retries ||= 0

http_opts = http_options.merge(:method => http_method)
if(data.present?)
http_opts.deep_merge!({
Expand All @@ -229,10 +246,23 @@ def mongo_orchestration(http_method, path, data = {})
end

response = Typhoeus::Request.new("http://127.0.0.1:13089#{path}", http_opts).run
assert_response_code(200, response)
if(response.code != 200)
raise MongoOrchestrationError
end
assert_equal("application/json", response.headers["content-type"])

MultiJson.load(response.body)
rescue MongoOrchestrationError
# If the request to mongo-orchestration failed, retry a few times. This can
# happen in certain cases when mongo-orchestration's Python client also
# gets confused by the ongoing replicaset changes.
retries += 1
if(retries <= 4)
sleep 0.5
retry
else
assert_response_code(200, response)
end
end

def setup_mongo_orchestration
Expand Down
6 changes: 3 additions & 3 deletions test/support/api_umbrella_test_helpers/setup.rb
Original file line number Diff line number Diff line change
Expand Up @@ -52,9 +52,9 @@ def setup_server
self.setup_mutex.synchronize do
unless self.setup_complete
Mongoid.load_configuration({
"clients" => {
"default" => {
"uri" => $config["mongodb"]["url"],
:clients => {
:default => {
:uri => $config["mongodb"]["url"],
},
},
})
Expand Down

0 comments on commit 8990348

Please sign in to comment.