From 52fafcd581677ee579e9e0e6420ee8b18adb1ca4 Mon Sep 17 00:00:00 2001 From: James Sumners Date: Tue, 25 Jun 2024 10:16:25 -0400 Subject: [PATCH] feat: Added support for getting container ids from ECS metadata API --- lib/utilization/docker-info.js | 145 +++++++++++-- test/unit/facts.test.js | 8 +- .../utilization/aws-ecs-api-response.json | 50 +++++ test/unit/utilization/docker-info.test.js | 190 ++++++++++++++++++ 4 files changed, 371 insertions(+), 22 deletions(-) create mode 100644 test/unit/utilization/aws-ecs-api-response.json create mode 100644 test/unit/utilization/docker-info.test.js diff --git a/lib/utilization/docker-info.js b/lib/utilization/docker-info.js index eeb8843c28..e9c39d1da8 100644 --- a/lib/utilization/docker-info.js +++ b/lib/utilization/docker-info.js @@ -5,52 +5,152 @@ 'use strict' -const logger = require('../logger').child({ component: 'docker-info' }) +const fs = require('node:fs') +const http = require('node:http') +const log = require('../logger').child({ component: 'docker-info' }) const common = require('./common') const NAMES = require('../metrics/names') const os = require('os') let vendorInfo = null + const CGROUPS_V1_PATH = '/proc/self/cgroup' const CGROUPS_V2_PATH = '/proc/self/mountinfo' +const BOOT_ID_PROC_FILE = '/proc/sys/kernel/random/boot_id' module.exports.getVendorInfo = fetchDockerVendorInfo module.exports.clearVendorCache = function clearDockerVendorCache() { vendorInfo = null } -module.exports.getBootId = function getBootId(agent, callback) { +module.exports.getBootId = function getBootId(agent, callback, logger = log) { if (!/linux/i.test(os.platform())) { logger.debug('Platform is not a flavor of linux, omitting boot info') return setImmediate(callback, null, null) } - common.readProc('/proc/sys/kernel/random/boot_id', function readProcBootId(err, data) { - if (!data) { - bootIdError() - return callback(null, null) + fs.access(BOOT_ID_PROC_FILE, fs.constants.F_OK, (err) => { + if (err == null) { + // The boot id proc file exists, so use it to get the container id. + return common.readProc(BOOT_ID_PROC_FILE, (_, data, cbAgent = agent) => { + readProcBootId({ data, agent: cbAgent, callback }) + }) } - data = data.trim() - const asciiData = Buffer.from(data, 'ascii').toString() + logger.debug('Container boot id is not available in cgroups info') - if (data !== asciiData) { - bootIdError() + if (hasAwsContainerApi() === false) { + // We don't seem to have a recognized location for getting the container + // identifier. + logger.debug('Container is not in a recognized ECS container, omitting boot info') + recordBootIdError(agent) return callback(null, null) } - if (data.length !== 36) { - bootIdError() - if (data.length > 128) { - data = data.substring(0, 128) + getEcsContainerId({ agent, callback, logger }) + }) +} + +/** + * Queries the AWS ECS metadata API to get the boot id. + * + * @param {object} params Function parameters. + * @param {object} params.agent Newrelic agent instance. + * @param {Function} params.callback Typical error first callback. The second + * parameter is the boot id as a string. + * @param {object} [params.logger] Internal logger instance. + */ +function getEcsContainerId({ agent, callback, logger }) { + const ecsApiUrl = + process.env.ECS_CONTAINER_METADATA_URI_V4 || process.env.ECS_CONTAINER_METADATA_URI + const req = http.request(ecsApiUrl, (res) => { + let body = Buffer.alloc(0) + res.on('data', (chunk) => { + body = Buffer.concat([body, chunk]) + }) + res.on('end', () => { + try { + const json = body.toString('utf8') + const data = JSON.parse(json) + if (data.DockerId == null) { + logger.debug('Failed to find DockerId in response, omitting boot info') + recordBootIdError(agent) + return callback(null, null) + } + callback(null, data.DockerId) + } catch (error) { + logger.debug('Failed to process ECS API response, omitting boot info: ' + error.message) + recordBootIdError(agent) + callback(null, null) } - } + }) + }) - return callback(null, data) + req.on('error', () => { + logger.debug('Failed to query ECS endpoint, omitting boot info') + recordBootIdError(agent) + callback(null, null) }) - function bootIdError() { - agent.metrics.getOrCreateMetric(NAMES.UTILIZATION.BOOT_ID_ERROR).incrementCallCount() + req.end() +} + +/** + * Inspects the running environment to determine if the AWS ECS metadata API + * is available. + * + * @see https://docs.aws.amazon.com/AmazonECS/latest/developerguide/ec2-metadata.html + * + * @returns {boolean} + */ +function hasAwsContainerApi() { + if (process.env.ECS_CONTAINER_METADATA_URI_V4 != null) { + return true } + return process.env.ECS_CONTAINER_METADATA_URI != null +} + +/** + * Increments a supportability metric to indicate that there was an error + * while trying to read the boot id from the system. + * + * @param {object} agent Newrelic agent instance. + */ +function recordBootIdError(agent) { + agent.metrics.getOrCreateMetric(NAMES.UTILIZATION.BOOT_ID_ERROR).incrementCallCount() +} + +/** + * Utility function to parse a Docker boot id from a cgroup proc file. + * + * @param {Buffer} data The information from the proc file. + * @param {object} agent Newrelic agent instance. + * @param {Function} callback Typical error first callback. Second parameter + * is the boot id as a string. + * + * @returns {*} + */ +function readProcBootId({ data, agent, callback }) { + if (!data) { + recordBootIdError(agent) + return callback(null, null) + } + + data = data.trim() + const asciiData = Buffer.from(data, 'ascii').toString() + + if (data !== asciiData) { + recordBootIdError(agent) + return callback(null, null) + } + + if (data.length !== 36) { + recordBootIdError(agent) + if (data.length > 128) { + data = data.substring(0, 128) + } + } + + return callback(null, data) } /** @@ -58,8 +158,9 @@ module.exports.getBootId = function getBootId(agent, callback) { * * @param {object} agent NR instance * @param {Function} callback function to call when done + * @param {object} [logger] internal logger instance */ -function fetchDockerVendorInfo(agent, callback) { +function fetchDockerVendorInfo(agent, callback, logger = log) { if (!agent.config.utilization || !agent.config.utilization.detect_docker) { return callback(null, null) } @@ -93,8 +194,9 @@ function fetchDockerVendorInfo(agent, callback) { * * @param {string} data file contents * @param {Function} callback function to call when done + * @param {object} [logger] internal logger instance */ -function parseCGroupsV2(data, callback) { +function parseCGroupsV2(data, callback, logger = log) { const containerLine = new RegExp('/docker/containers/([0-9a-f]{64})/') const line = containerLine.exec(data) if (line) { @@ -110,8 +212,9 @@ function parseCGroupsV2(data, callback) { * e.g. - `4:cpu:/docker/f37a7e4d17017e7bf774656b19ca4360c6cdc4951c86700a464101d0d9ce97ee` * * @param {Function} callback function to call when done + * @param {object} [logger] internal logger instance */ -function findCGroupsV1(callback) { +function findCGroupsV1(callback, logger = log) { common.readProc(CGROUPS_V1_PATH, function getCGroup(err, data) { if (!data) { logger.debug(`${CGROUPS_V1_PATH} not found, exiting parsing containerId.`) diff --git a/test/unit/facts.test.js b/test/unit/facts.test.js index ee16c48745..fe96f02a74 100644 --- a/test/unit/facts.test.js +++ b/test/unit/facts.test.js @@ -6,6 +6,8 @@ 'use strict' const tap = require('tap') +const fs = require('fs') +const fsAccess = fs.access const os = require('os') const hostname = os.hostname const networkInterfaces = os.networkInterfaces @@ -518,6 +520,7 @@ tap.test('boot_id', (t) => { startingOsPlatform = os.platform os.platform = () => 'linux' + fs.access = (file, mode, cb) => cb(null) }) t.afterEach(() => { @@ -530,6 +533,7 @@ tap.test('boot_id', (t) => { sysInfo._getDockerContainerId = startingDockerInfo common.readProc = startingCommonReadProc os.platform = startingOsPlatform + fs.access = fsAccess startingGetMemory = null startingGetProcessor = null @@ -562,7 +566,9 @@ tap.test('boot_id', (t) => { break case 'input_boot_id': - mockReadProc = (file, cb) => cb(null, testValue) + mockReadProc = (file, cb) => { + cb(null, testValue, agent) + } break // Ignore these keys. diff --git a/test/unit/utilization/aws-ecs-api-response.json b/test/unit/utilization/aws-ecs-api-response.json new file mode 100644 index 0000000000..e8d2a86016 --- /dev/null +++ b/test/unit/utilization/aws-ecs-api-response.json @@ -0,0 +1,50 @@ +{ + "DockerId": "1e1698469422439ea356071e581e8545-2769485393", + "Name": "fargateapp", + "DockerName": "fargateapp", + "Image": "123456789012.dkr.ecr.us-west-2.amazonaws.com/fargatetest:latest", + "ImageID": "sha256:1234567890abcdef1234567890abcdef1234567890abcdef1234567890abcd", + "Labels": { + "com.amazonaws.ecs.cluster": "arn:aws:ecs:us-west-2:123456789012:cluster/testcluster", + "com.amazonaws.ecs.container-name": "fargateapp", + "com.amazonaws.ecs.task-arn": "arn:aws:ecs:us-west-2:123456789012:task/testcluster/1e1698469422439ea356071e581e8545", + "com.amazonaws.ecs.task-definition-family": "fargatetestapp", + "com.amazonaws.ecs.task-definition-version": "7" + }, + "DesiredStatus": "RUNNING", + "KnownStatus": "RUNNING", + "Limits": { + "CPU": 2 + }, + "CreatedAt": "2024-04-25T17:38:31.073208914Z", + "StartedAt": "2024-04-25T17:38:31.073208914Z", + "Type": "NORMAL", + "LogDriver": "awslogs", + "LogOptions": { + "awslogs-create-group": "true", + "awslogs-group": "/ecs/fargatetestapp", + "awslogs-region": "us-west-2", + "awslogs-stream": "ecs/fargateapp/1e1698469422439ea356071e581e8545" + }, + "ContainerARN": "arn:aws:ecs:us-west-2:123456789012:container/testcluster/1e1698469422439ea356071e581e8545/050256a5-a7f3-461c-a16f-aca4eae37b01", + "Networks": [ + { + "NetworkMode": "awsvpc", + "IPv4Addresses": [ + "10.10.10.10" + ], + "AttachmentIndex": 0, + "MACAddress": "06:d7:3f:49:1d:a7", + "IPv4SubnetCIDRBlock": "10.10.10.0/20", + "DomainNameServers": [ + "10.10.10.2" + ], + "DomainNameSearchList": [ + "us-west-2.compute.internal" + ], + "PrivateDNSName": "ip-10-10-10-10.us-west-2.compute.internal", + "SubnetGatewayIpv4Address": "10.10.10.1/20" + } + ], + "Snapshotter": "overlayfs" +} diff --git a/test/unit/utilization/docker-info.test.js b/test/unit/utilization/docker-info.test.js new file mode 100644 index 0000000000..a3554b36a6 --- /dev/null +++ b/test/unit/utilization/docker-info.test.js @@ -0,0 +1,190 @@ +/* + * Copyright 2024 New Relic Corporation. All rights reserved. + * SPDX-License-Identifier: Apache-2.0 + */ + +'use strict' + +const tap = require('tap') +const fs = require('node:fs') +const http = require('node:http') +const os = require('node:os') +const helper = require('../../lib/agent_helper') +const standardResponse = require('./aws-ecs-api-response.json') +const { getBootId } = require('../../../lib/utilization/docker-info') + +tap.beforeEach(async (t) => { + t.context.orig = { + fs_access: fs.access, + os_platform: os.platform + } + fs.access = (file, mode, cb) => { + cb(Error('no proc file')) + } + os.platform = () => 'linux' + + t.context.agent = helper.loadMockedAgent() + t.context.agent.config.utilization = { + detect_aws: true, + detect_azure: true, + detect_gcp: true, + detect_docker: true, + detect_kubernetes: true, + detect_pcf: true + } + + t.context.logs = [] + t.context.logger = { + debug(msg) { + t.context.logs.push(msg) + } + } + + t.context.server = await getServer() +}) + +tap.afterEach((t) => { + fs.access = t.context.orig.fs_access + os.platform = t.context.orig.os_platform + + t.context.server.close() + + helper.unloadAgent(t.context.agent) + + delete process.env.ECS_CONTAINER_METADATA_URI + delete process.env.ECS_CONTAINER_METADATA_URI_V4 +}) + +async function getServer() { + const server = http.createServer((req, res) => { + res.writeHead(200, { 'content-type': 'application/json' }) + + switch (req.url) { + case '/json-error': { + res.end(`{"invalid":"json"`) + break + } + + case '/no-id': { + res.end(`{}`) + break + } + + default: { + res.end(JSON.stringify(standardResponse)) + } + } + }) + + await new Promise((resolve) => { + server.listen(0, '127.0.0.1', () => { + resolve() + }) + }) + + return server +} + +tap.test('skips if not in ecs container', (t) => { + const { agent, logs, logger } = t.context + + function callback(err, data) { + t.error(err) + t.strictSame(logs, [ + 'Container boot id is not available in cgroups info', + 'Container is not in a recognized ECS container, omitting boot info' + ]) + t.equal(data, null) + t.equal( + agent.metrics._metrics.unscoped['Supportability/utilization/boot_id/error']?.callCount, + 1 + ) + t.end() + } + + getBootId(agent, callback, logger) +}) + +tap.test('records request error', (t) => { + const { agent, logs, logger, server } = t.context + const info = server.address() + process.env.ECS_CONTAINER_METADATA_URI_V4 = `http://${info.address}:0` + + function callback(err, data) { + t.error(err) + t.strictSame(logs, [ + 'Container boot id is not available in cgroups info', + `Failed to query ECS endpoint, omitting boot info` + ]) + t.equal(data, null) + t.equal( + agent.metrics._metrics.unscoped['Supportability/utilization/boot_id/error']?.callCount, + 1 + ) + t.end() + } + + getBootId(agent, callback, logger) +}) + +tap.test('records json parsing error', (t) => { + const { agent, logs, logger, server } = t.context + const info = server.address() + process.env.ECS_CONTAINER_METADATA_URI_V4 = `http://${info.address}:${info.port}/json-error` + + function callback(err, data) { + t.error(err) + t.match(logs, [ + 'Container boot id is not available in cgroups info', + // Node 16 has a different format for JSON parsing errors: + /Failed to process ECS API response, omitting boot info: (Expected|Unexpected)/ + ]) + t.equal(data, null) + t.equal( + agent.metrics._metrics.unscoped['Supportability/utilization/boot_id/error']?.callCount, + 1 + ) + t.end() + } + + getBootId(agent, callback, logger) +}) + +tap.test('records error for no id in response', (t) => { + const { agent, logs, logger, server } = t.context + const info = server.address() + process.env.ECS_CONTAINER_METADATA_URI_V4 = `http://${info.address}:${info.port}/no-id` + + function callback(err, data) { + t.error(err) + t.strictSame(logs, [ + 'Container boot id is not available in cgroups info', + 'Failed to find DockerId in response, omitting boot info' + ]) + t.equal(data, null) + t.equal( + agent.metrics._metrics.unscoped['Supportability/utilization/boot_id/error']?.callCount, + 1 + ) + t.end() + } + + getBootId(agent, callback, logger) +}) + +tap.test('records found id', (t) => { + const { agent, logs, logger, server } = t.context + const info = server.address() + // Cover the non-V4 case: + process.env.ECS_CONTAINER_METADATA_URI = `http://${info.address}:${info.port}/success` + + function callback(err, data) { + t.error(err) + t.strictSame(logs, ['Container boot id is not available in cgroups info']) + t.equal(data, '1e1698469422439ea356071e581e8545-2769485393') + t.notOk(agent.metrics._metrics.unscoped['Supportability/utilization/boot_id/error']?.callCount) + t.end() + } + + getBootId(agent, callback, logger) +})