From 8ca3ee8a89950e80f8ba7101dac148e4f280a329 Mon Sep 17 00:00:00 2001 From: Alan Storm Date: Fri, 9 Apr 2021 17:02:43 -0700 Subject: [PATCH 01/12] fix: hostname instead of IP address for span URL --- lib/instrumentation/http-shared.js | 37 ++- package.json | 1 - test/instrumentation/modules/http/outgoing.js | 18 +- .../modules/http/request-to-url.js | 285 ++++++++++++++++++ 4 files changed, 324 insertions(+), 17 deletions(-) create mode 100644 test/instrumentation/modules/http/request-to-url.js diff --git a/lib/instrumentation/http-shared.js b/lib/instrumentation/http-shared.js index 8c75a32cca..b8117ef3e7 100644 --- a/lib/instrumentation/http-shared.js +++ b/lib/instrumentation/http-shared.js @@ -3,7 +3,6 @@ var url = require('url') var endOfStream = require('end-of-stream') -var httpRequestToUrl = require('http-request-to-url') var { parseUrl } = require('../parsers') var { getHTTPDestination } = require('./context') @@ -191,13 +190,11 @@ exports.traceOutgoingRequest = function (agent, moduleName, method) { return emit.apply(req, arguments) } - let url, statusCode - httpRequestToUrl(req).then(_url => { - url = _url - }).catch(() => { + const url = getUrlFromRequestAndOptions(req, options) + if (!url) { agent.logger.warn('unable to identify http.ClientRequest url %o', { id: id }) - }) - + } + let statusCode return req // In case the request is ended prematurely @@ -260,3 +257,29 @@ function isAWSSigned (opts) { const auth = opts.headers && (opts.headers.Authorization || opts.headers.authorization) return typeof auth === 'string' ? auth.startsWith('AWS4-') : false } + +// creates a sanitized URL suitable for the span's HTTP context +// +// This function recontructs a URL using the request object's +// properties where it can, and falling back to the user provided +// options where it can not. +// +// @param {req} ClientRequest +// @param {options} object +// @return string +function getUrlFromRequestAndOptions (req, options) { + if (!req) { + return false + } + options = options || {} + const port = options.port ? `:${options.port}` : '' + let url + try { + url = new URL(`${req.protocol}//${req.host}${port}${req.path}`) + } catch (e) { + return false + } + return `${url.protocol}//${url.host}${url.pathname}${url.search}` +} + +exports.getUrlFromRequestAndOptions = getUrlFromRequestAndOptions diff --git a/package.json b/package.json index 5207b42ae9..847b42e857 100644 --- a/package.json +++ b/package.json @@ -93,7 +93,6 @@ "escape-string-regexp": "^4.0.0", "fast-safe-stringify": "^2.0.7", "http-headers": "^3.0.2", - "http-request-to-url": "^1.0.0", "is-native": "^1.0.1", "measured-reporting": "^1.51.1", "monitor-event-loop-delay": "^1.0.0", diff --git a/test/instrumentation/modules/http/outgoing.js b/test/instrumentation/modules/http/outgoing.js index 1ddc0da2d3..b2171bc5fd 100644 --- a/test/instrumentation/modules/http/outgoing.js +++ b/test/instrumentation/modules/http/outgoing.js @@ -172,15 +172,15 @@ function echoTest (type, opts, handler) { t.deepEqual(data.spans[0].context.http, { method: 'GET', status_code: 200, - url: `${type}://127.0.0.1:${port}/` + url: `${type}://localhost:${port}/` }) t.deepEqual(data.spans[0].context.destination, { service: { - name: `${type}://127.0.0.1:${port}`, - resource: `127.0.0.1:${port}`, + name: `${type}://localhost:${port}`, + resource: `localhost:${port}`, type: data.spans[0].type }, - address: '127.0.0.1', + address: 'localhost', port: Number(port) }) t.end() @@ -224,7 +224,7 @@ function abortTest (type, handler) { const httpContext = { method: 'GET', status_code: undefined, - url: undefined + url: `http://localhost:${port}/` } resetAgent({}, data => { @@ -241,11 +241,11 @@ function abortTest (type, handler) { if (httpContext.url) { t.deepEqual(data.spans[0].context.destination, { service: { - name: `${type}://127.0.0.1:${port}`, - resource: `127.0.0.1:${port}`, + name: `${type}://localhost:${port}`, + resource: `localhost:${port}`, type: data.spans[0].type }, - address: '127.0.0.1', + address: 'localhost', port: Number(port) }) } @@ -262,7 +262,7 @@ function abortTest (type, handler) { req.on('response', () => { httpContext.status_code = 200 - httpContext.url = `${type}://127.0.0.1:${port}/` + httpContext.url = `${type}://localhost:${port}/` }) // NOTE: Don't use an arrow function here diff --git a/test/instrumentation/modules/http/request-to-url.js b/test/instrumentation/modules/http/request-to-url.js new file mode 100644 index 0000000000..b8117ef3e7 --- /dev/null +++ b/test/instrumentation/modules/http/request-to-url.js @@ -0,0 +1,285 @@ +'use strict' + +var url = require('url') + +var endOfStream = require('end-of-stream') + +var { parseUrl } = require('../parsers') +var { getHTTPDestination } = require('./context') + +const transactionForResponse = new WeakMap() +exports.transactionForResponse = transactionForResponse + +exports.instrumentRequest = function (agent, moduleName) { + var ins = agent._instrumentation + return function (orig) { + return function (event, req, res) { + if (event === 'request') { + agent.logger.debug('intercepted request event call to %s.Server.prototype.emit for %s', moduleName, req.url) + + if (isRequestBlacklisted(agent, req)) { + agent.logger.debug('ignoring blacklisted request to %s', req.url) + // don't leak previous transaction + agent._instrumentation.currentTransaction = null + } else { + var traceparent = req.headers['elastic-apm-traceparent'] || req.headers.traceparent + var tracestate = req.headers.tracestate + var trans = agent.startTransaction(null, null, { + childOf: traceparent, + tracestate: tracestate + }) + trans.type = 'request' + trans.req = req + trans.res = res + + transactionForResponse.set(res, trans) + + ins.bindEmitter(req) + ins.bindEmitter(res) + + endOfStream(res, function (err) { + if (trans.ended) return + if (!err) return trans.end() + + if (agent._conf.errorOnAbortedRequests) { + var duration = trans._timer.elapsed() + if (duration > (agent._conf.abortedErrorThreshold * 1000)) { + agent.captureError('Socket closed with active HTTP request (>' + agent._conf.abortedErrorThreshold + ' sec)', { + request: req, + extra: { abortTime: duration } + }) + } + } + + // Handle case where res.end is called after an error occurred on the + // stream (e.g. if the underlying socket was prematurely closed) + const end = res.end + res.end = function () { + const result = end.apply(this, arguments) + trans.end() + return result + } + }) + } + } + + return orig.apply(this, arguments) + } + } +} + +function isRequestBlacklisted (agent, req) { + var i + + for (i = 0; i < agent._conf.ignoreUrlStr.length; i++) { + if (agent._conf.ignoreUrlStr[i] === req.url) return true + } + for (i = 0; i < agent._conf.ignoreUrlRegExp.length; i++) { + if (agent._conf.ignoreUrlRegExp[i].test(req.url)) return true + } + for (i = 0; i < agent._conf.transactionIgnoreUrlRegExp.length; i++) { + if (agent._conf.transactionIgnoreUrlRegExp[i].test(req.url)) return true + } + + var ua = req.headers['user-agent'] + if (!ua) return false + + for (i = 0; i < agent._conf.ignoreUserAgentStr.length; i++) { + if (ua.indexOf(agent._conf.ignoreUserAgentStr[i]) === 0) return true + } + for (i = 0; i < agent._conf.ignoreUserAgentRegExp.length; i++) { + if (agent._conf.ignoreUserAgentRegExp[i].test(ua)) return true + } + + return false +} + +function formatURL (item) { + return { + href: item.href, + pathname: item.pathname, + path: item.pathname + (item.search || ''), + protocol: item.protocol, + host: item.host, + port: item.port, + hostname: item.hostname, + hash: item.hash, + search: item.search + } +} + +// NOTE: This will also stringify and parse URL instances +// to a format which can be mixed into the options object. +function ensureUrl (v) { + if (typeof v === 'string') { + return formatURL(parseUrl(v)) + } else if (url.URL && v instanceof url.URL) { + return formatURL(v) + } else { + return v + } +} + +function getSafeHost (res) { + return res.getHeader ? res.getHeader('Host') : res._headers.host +} + +exports.traceOutgoingRequest = function (agent, moduleName, method) { + var ins = agent._instrumentation + + return function (orig) { + return function (...args) { + // TODO: See if we can delay the creation of span until the `response` + // event is fired, while still having it have the correct stack trace + var span = agent.startSpan(null, 'external', moduleName, 'http') + var id = span && span.transaction.id + + agent.logger.debug('intercepted call to %s.%s %o', moduleName, method, { id: id }) + + var options = {} + var newArgs = [options] + for (const arg of args) { + if (typeof arg === 'function') { + newArgs.push(arg) + } else { + Object.assign(options, ensureUrl(arg)) + } + } + + if (!options.headers) options.headers = {} + + // Attempt to use the span context as a traceparent header. + // If the transaction is unsampled the span will not exist, + // however a traceparent header must still be propagated + // to indicate requested services should not be sampled. + // Use the transaction context as the parent, in this case. + var parent = span || agent.currentTransaction + if (parent && parent._context && shouldPropagateTraceContext(options)) { + const headerValue = parent._context.toTraceParentString() + const traceStateValue = parent._context.toTraceStateString() + + options.headers.traceparent = headerValue + options.headers.tracestate = traceStateValue + if (agent._conf.useElasticTraceparentHeader) { + options.headers['elastic-apm-traceparent'] = headerValue + } + } + + var req = orig.apply(this, newArgs) + if (!span) return req + + if (getSafeHost(req) === agent._conf.serverHost) { + agent.logger.debug('ignore %s request to intake API %o', moduleName, { id: id }) + return req + } else { + var protocol = req.agent && req.agent.protocol + agent.logger.debug('request details: %o', { protocol: protocol, host: getSafeHost(req), id: id }) + } + + ins.bindEmitter(req) + + span.name = req.method + ' ' + getSafeHost(req) + parseUrl(req.path).pathname + + // TODO: Research if it's possible to add this to the prototype instead. + // Or if it's somehow preferable to listen for when a `response` listener + // is added instead of when `response` is emitted. + const emit = req.emit + req.emit = function (type, res) { + if (type === 'response') onresponse(res) + if (type === 'abort') onAbort(type) + return emit.apply(req, arguments) + } + + const url = getUrlFromRequestAndOptions(req, options) + if (!url) { + agent.logger.warn('unable to identify http.ClientRequest url %o', { id: id }) + } + let statusCode + return req + + // In case the request is ended prematurely + function onAbort (type) { + if (span.ended) return + agent.logger.debug('intercepted http.ClientRequest abort event %o', { id: id }) + + onEnd() + } + + function onEnd () { + span.setHttpContext({ + method: req.method, + status_code: statusCode, + url + }) + + // Add destination info only when socket conn is established + if (url) { + // The `getHTTPDestination` function might throw in case an + // invalid URL is given to the `URL()` function. Until we can + // be 100% sure this doesn't happen, we better catch it here. + // For details, see: + // https://github.com/elastic/apm-agent-nodejs/issues/1769 + try { + span.setDestinationContext(getHTTPDestination(url, span.type)) + } catch (e) { + agent.logger.error('Could not set destination context: %s', e.message) + } + } + + span._setOutcomeFromHttpStatusCode(statusCode) + span.end() + } + + function onresponse (res) { + // Work around async_hooks bug in Node.js 12.0 - 12.2 (https://github.com/nodejs/node/pull/27477) + ins._recoverTransaction(span.transaction) + + agent.logger.debug('intercepted http.ClientRequest response event %o', { id: id }) + ins.bindEmitter(res) + + statusCode = res.statusCode + + res.prependListener('end', function () { + agent.logger.debug('intercepted http.IncomingMessage end event %o', { id: id }) + + onEnd() + }) + } + } + } +} + +function shouldPropagateTraceContext (opts) { + return !isAWSSigned(opts) +} + +function isAWSSigned (opts) { + const auth = opts.headers && (opts.headers.Authorization || opts.headers.authorization) + return typeof auth === 'string' ? auth.startsWith('AWS4-') : false +} + +// creates a sanitized URL suitable for the span's HTTP context +// +// This function recontructs a URL using the request object's +// properties where it can, and falling back to the user provided +// options where it can not. +// +// @param {req} ClientRequest +// @param {options} object +// @return string +function getUrlFromRequestAndOptions (req, options) { + if (!req) { + return false + } + options = options || {} + const port = options.port ? `:${options.port}` : '' + let url + try { + url = new URL(`${req.protocol}//${req.host}${port}${req.path}`) + } catch (e) { + return false + } + return `${url.protocol}//${url.host}${url.pathname}${url.search}` +} + +exports.getUrlFromRequestAndOptions = getUrlFromRequestAndOptions From 6e5e8f33017ebcfcd41df9e49abeb4b2e65fd658 Mon Sep 17 00:00:00 2001 From: Alan Storm Date: Fri, 9 Apr 2021 17:06:59 -0700 Subject: [PATCH 02/12] fix: right file --- .../modules/http/request-to-url.js | 385 +++++------------- 1 file changed, 110 insertions(+), 275 deletions(-) diff --git a/test/instrumentation/modules/http/request-to-url.js b/test/instrumentation/modules/http/request-to-url.js index b8117ef3e7..700db9cfcc 100644 --- a/test/instrumentation/modules/http/request-to-url.js +++ b/test/instrumentation/modules/http/request-to-url.js @@ -1,285 +1,120 @@ -'use strict' +const tape = require('tape') +const http = require('http') +const { getUrlFromRequestAndOptions } = require('../../../../lib/instrumentation/http-shared') -var url = require('url') - -var endOfStream = require('end-of-stream') - -var { parseUrl } = require('../parsers') -var { getHTTPDestination } = require('./context') - -const transactionForResponse = new WeakMap() -exports.transactionForResponse = transactionForResponse - -exports.instrumentRequest = function (agent, moduleName) { - var ins = agent._instrumentation - return function (orig) { - return function (event, req, res) { - if (event === 'request') { - agent.logger.debug('intercepted request event call to %s.Server.prototype.emit for %s', moduleName, req.url) - - if (isRequestBlacklisted(agent, req)) { - agent.logger.debug('ignoring blacklisted request to %s', req.url) - // don't leak previous transaction - agent._instrumentation.currentTransaction = null - } else { - var traceparent = req.headers['elastic-apm-traceparent'] || req.headers.traceparent - var tracestate = req.headers.tracestate - var trans = agent.startTransaction(null, null, { - childOf: traceparent, - tracestate: tracestate - }) - trans.type = 'request' - trans.req = req - trans.res = res - - transactionForResponse.set(res, trans) - - ins.bindEmitter(req) - ins.bindEmitter(res) - - endOfStream(res, function (err) { - if (trans.ended) return - if (!err) return trans.end() - - if (agent._conf.errorOnAbortedRequests) { - var duration = trans._timer.elapsed() - if (duration > (agent._conf.abortedErrorThreshold * 1000)) { - agent.captureError('Socket closed with active HTTP request (>' + agent._conf.abortedErrorThreshold + ' sec)', { - request: req, - extra: { abortTime: duration } - }) - } - } - - // Handle case where res.end is called after an error occurred on the - // stream (e.g. if the underlying socket was prematurely closed) - const end = res.end - res.end = function () { - const result = end.apply(this, arguments) - trans.end() - return result - } - }) - } - } - - return orig.apply(this, arguments) - } - } -} - -function isRequestBlacklisted (agent, req) { - var i - - for (i = 0; i < agent._conf.ignoreUrlStr.length; i++) { - if (agent._conf.ignoreUrlStr[i] === req.url) return true - } - for (i = 0; i < agent._conf.ignoreUrlRegExp.length; i++) { - if (agent._conf.ignoreUrlRegExp[i].test(req.url)) return true - } - for (i = 0; i < agent._conf.transactionIgnoreUrlRegExp.length; i++) { - if (agent._conf.transactionIgnoreUrlRegExp[i].test(req.url)) return true - } - - var ua = req.headers['user-agent'] - if (!ua) return false - - for (i = 0; i < agent._conf.ignoreUserAgentStr.length; i++) { - if (ua.indexOf(agent._conf.ignoreUserAgentStr[i]) === 0) return true - } - for (i = 0; i < agent._conf.ignoreUserAgentRegExp.length; i++) { - if (agent._conf.ignoreUserAgentRegExp[i].test(ua)) return true - } - - return false -} - -function formatURL (item) { - return { - href: item.href, - pathname: item.pathname, - path: item.pathname + (item.search || ''), - protocol: item.protocol, - host: item.host, - port: item.port, - hostname: item.hostname, - hash: item.hash, - search: item.search - } -} - -// NOTE: This will also stringify and parse URL instances -// to a format which can be mixed into the options object. -function ensureUrl (v) { - if (typeof v === 'string') { - return formatURL(parseUrl(v)) - } else if (url.URL && v instanceof url.URL) { - return formatURL(v) - } else { - return v - } +// Creates a ClientRequest from options +// +// Creates and request an immediatly aborts/destroys it. +// This allows us to test with real ClientRequest objects +// and ensure their underlying properties are stable/consistant +// across versions. +// +// @param {options} options +// @return {ClientRequest} +function requestFromOptions (options) { + const req = http.request(options) + req.on('error', function () {}) + req.destroy() + return req } -function getSafeHost (res) { - return res.getHeader ? res.getHeader('Host') : res._headers.host +// Creates a ClientRequest from url and options +// +// Same as requestFromOptions, but uses optional URL argument +// +// @param {url} string +// @param {object} options +// @return {ClientRequest} + +function requestFromUrlAndOptions (url, options) { + const req = http.request(url, options) + req.on('error', function () {}) + req.destroy() + return req } -exports.traceOutgoingRequest = function (agent, moduleName, method) { - var ins = agent._instrumentation - - return function (orig) { - return function (...args) { - // TODO: See if we can delay the creation of span until the `response` - // event is fired, while still having it have the correct stack trace - var span = agent.startSpan(null, 'external', moduleName, 'http') - var id = span && span.transaction.id - - agent.logger.debug('intercepted call to %s.%s %o', moduleName, method, { id: id }) - - var options = {} - var newArgs = [options] - for (const arg of args) { - if (typeof arg === 'function') { - newArgs.push(arg) - } else { - Object.assign(options, ensureUrl(arg)) - } - } - - if (!options.headers) options.headers = {} - - // Attempt to use the span context as a traceparent header. - // If the transaction is unsampled the span will not exist, - // however a traceparent header must still be propagated - // to indicate requested services should not be sampled. - // Use the transaction context as the parent, in this case. - var parent = span || agent.currentTransaction - if (parent && parent._context && shouldPropagateTraceContext(options)) { - const headerValue = parent._context.toTraceParentString() - const traceStateValue = parent._context.toTraceStateString() - - options.headers.traceparent = headerValue - options.headers.tracestate = traceStateValue - if (agent._conf.useElasticTraceparentHeader) { - options.headers['elastic-apm-traceparent'] = headerValue - } - } - - var req = orig.apply(this, newArgs) - if (!span) return req - - if (getSafeHost(req) === agent._conf.serverHost) { - agent.logger.debug('ignore %s request to intake API %o', moduleName, { id: id }) - return req - } else { - var protocol = req.agent && req.agent.protocol - agent.logger.debug('request details: %o', { protocol: protocol, host: getSafeHost(req), id: id }) - } - - ins.bindEmitter(req) - - span.name = req.method + ' ' + getSafeHost(req) + parseUrl(req.path).pathname - - // TODO: Research if it's possible to add this to the prototype instead. - // Or if it's somehow preferable to listen for when a `response` listener - // is added instead of when `response` is emitted. - const emit = req.emit - req.emit = function (type, res) { - if (type === 'response') onresponse(res) - if (type === 'abort') onAbort(type) - return emit.apply(req, arguments) - } - - const url = getUrlFromRequestAndOptions(req, options) - if (!url) { - agent.logger.warn('unable to identify http.ClientRequest url %o', { id: id }) - } - let statusCode - return req - - // In case the request is ended prematurely - function onAbort (type) { - if (span.ended) return - agent.logger.debug('intercepted http.ClientRequest abort event %o', { id: id }) - - onEnd() - } - - function onEnd () { - span.setHttpContext({ - method: req.method, - status_code: statusCode, - url - }) - - // Add destination info only when socket conn is established - if (url) { - // The `getHTTPDestination` function might throw in case an - // invalid URL is given to the `URL()` function. Until we can - // be 100% sure this doesn't happen, we better catch it here. - // For details, see: - // https://github.com/elastic/apm-agent-nodejs/issues/1769 - try { - span.setDestinationContext(getHTTPDestination(url, span.type)) - } catch (e) { - agent.logger.error('Could not set destination context: %s', e.message) - } - } - - span._setOutcomeFromHttpStatusCode(statusCode) - span.end() - } - - function onresponse (res) { - // Work around async_hooks bug in Node.js 12.0 - 12.2 (https://github.com/nodejs/node/pull/27477) - ins._recoverTransaction(span.transaction) - - agent.logger.debug('intercepted http.ClientRequest response event %o', { id: id }) - ins.bindEmitter(res) - - statusCode = res.statusCode - - res.prependListener('end', function () { - agent.logger.debug('intercepted http.IncomingMessage end event %o', { id: id }) - - onEnd() - }) - } +tape('getUrlFromRequestAndOptions', function (suite) { + suite.test('host', function (t) { + const options = { + host: 'example.com' } - } -} + const req = requestFromOptions(options) -function shouldPropagateTraceContext (opts) { - return !isAWSSigned(opts) -} + const url = getUrlFromRequestAndOptions(req, options) + t.equals(url, 'http://example.com/', 'url rendered as expected') + t.end() + }) -function isAWSSigned (opts) { - const auth = opts.headers && (opts.headers.Authorization || opts.headers.authorization) - return typeof auth === 'string' ? auth.startsWith('AWS4-') : false -} + suite.test('host, path', function (t) { + const options = { + host: 'example.com', + path: '/foo' + } + const req = requestFromOptions(options) + + const url = getUrlFromRequestAndOptions(req, options) + t.equals(url, 'http://example.com/foo', 'url rendered as expected') + t.end() + }) + + suite.test('host, path, port, and query string', function (t) { + const options = { + host: 'example.com', + path: '/foo?fpp=bar', + port: 32 + } + const req = requestFromOptions(options) + + const url = getUrlFromRequestAndOptions(req, options) + t.equals(url, 'http://example.com:32/foo?fpp=bar', 'url rendered as expected') + t.end() + }) + + suite.test('host, path, port, query string, password', function (t) { + const options = { + host: 'username:password@example.com', + path: '/foo?fpp=bar', + port: 32 + } + const req = requestFromOptions(options) -// creates a sanitized URL suitable for the span's HTTP context -// -// This function recontructs a URL using the request object's -// properties where it can, and falling back to the user provided -// options where it can not. -// -// @param {req} ClientRequest -// @param {options} object -// @return string -function getUrlFromRequestAndOptions (req, options) { - if (!req) { - return false - } - options = options || {} - const port = options.port ? `:${options.port}` : '' - let url - try { - url = new URL(`${req.protocol}//${req.host}${port}${req.path}`) - } catch (e) { - return false - } - return `${url.protocol}//${url.host}${url.pathname}${url.search}` -} + const url = getUrlFromRequestAndOptions(req, options) + t.equals(url, 'http://example.com:32/foo?fpp=bar', 'url rendered as expected') + t.end() + }) -exports.getUrlFromRequestAndOptions = getUrlFromRequestAndOptions + suite.test('passes URL string to request but also options', function (t) { + const options = { + host: 'username:password@one.example.com' + } + const req = requestFromUrlAndOptions( + 'http://username2:password2@two.example.com/bar', + options + ) + const url = getUrlFromRequestAndOptions(req, options) + t.equals(url, 'http://two.example.com/bar', 'url rendered as expected') + t.end() + }) + + suite.test('exceptions caught', function (t) { + const options = { + host: 'username:password@one.example.com' + } + const req = requestFromUrlAndOptions( + 'http://username2:password2@two.example.com/bar', + options + ) + const url1 = getUrlFromRequestAndOptions(null, null) + const url2 = getUrlFromRequestAndOptions(req, null) + const url3 = getUrlFromRequestAndOptions(null, options) + const url4 = getUrlFromRequestAndOptions({}, options) + + t.equal(url1, false, 'no url returned') + t.equal(url2, 'http://two.example.com/bar', 'no url returned') + t.equal(url3, false, 'no url returned') + t.equal(url4, false, 'no url returned') + t.end() + }) + + suite.end() +}) From 9e28e29818c489006baab4c0d437440296527f98 Mon Sep 17 00:00:00 2001 From: Alan Storm Date: Mon, 12 Apr 2021 16:07:49 -0700 Subject: [PATCH 03/12] fix: all versions of node --- lib/instrumentation/http-shared.js | 20 +++++++-- .../modules/http/request-to-url.js | 42 ++++++------------- 2 files changed, 29 insertions(+), 33 deletions(-) diff --git a/lib/instrumentation/http-shared.js b/lib/instrumentation/http-shared.js index b8117ef3e7..30e23221ff 100644 --- a/lib/instrumentation/http-shared.js +++ b/lib/instrumentation/http-shared.js @@ -1,7 +1,7 @@ 'use strict' var url = require('url') - +const { URL } = require('url') var endOfStream = require('end-of-stream') var { parseUrl } = require('../parsers') @@ -261,8 +261,12 @@ function isAWSSigned (opts) { // creates a sanitized URL suitable for the span's HTTP context // // This function recontructs a URL using the request object's -// properties where it can, and falling back to the user provided -// options where it can not. +// properties where it can, and falling back to the options +// where it can not. +// +// NOTE: The options argument may not be the same options that are passed to +// http.request if the caller uses the the (url,options,...) method signature. +// The agent pre-normalizes the url and options into a single options array. // // @param {req} ClientRequest // @param {options} object @@ -272,13 +276,21 @@ function getUrlFromRequestAndOptions (req, options) { return false } options = options || {} + req = req || {} + req.agent = req.agent || {} + const port = options.port ? `:${options.port}` : '' + const host = req.host || options.hostname || options.host || 'unknown' + const protocol = req.protocol || req.agent.protocol + let url try { - url = new URL(`${req.protocol}//${req.host}${port}${req.path}`) + url = new URL(`${protocol}//${host}${port}${req.path}`) } catch (e) { + // console.log(e) return false } + return `${url.protocol}//${url.host}${url.pathname}${url.search}` } diff --git a/test/instrumentation/modules/http/request-to-url.js b/test/instrumentation/modules/http/request-to-url.js index 700db9cfcc..34f14744b8 100644 --- a/test/instrumentation/modules/http/request-to-url.js +++ b/test/instrumentation/modules/http/request-to-url.js @@ -18,21 +18,6 @@ function requestFromOptions (options) { return req } -// Creates a ClientRequest from url and options -// -// Same as requestFromOptions, but uses optional URL argument -// -// @param {url} string -// @param {object} options -// @return {ClientRequest} - -function requestFromUrlAndOptions (url, options) { - const req = http.request(url, options) - req.on('error', function () {}) - req.destroy() - return req -} - tape('getUrlFromRequestAndOptions', function (suite) { suite.test('host', function (t) { const options = { @@ -83,34 +68,33 @@ tape('getUrlFromRequestAndOptions', function (suite) { t.end() }) - suite.test('passes URL string to request but also options', function (t) { + suite.test('hostname beats host', function (t) { const options = { - host: 'username:password@one.example.com' + host: 'username:password@two.example.com', + hostname: 'username:password@one.example.com', + path: '/bar' } - const req = requestFromUrlAndOptions( - 'http://username2:password2@two.example.com/bar', - options - ) + const req = requestFromOptions(options) const url = getUrlFromRequestAndOptions(req, options) - t.equals(url, 'http://two.example.com/bar', 'url rendered as expected') + t.equals(url, 'http://one.example.com/bar', 'url rendered as expected') t.end() }) - suite.test('exceptions caught', function (t) { + suite.test('exceptions handled', function (t) { const options = { - host: 'username:password@one.example.com' + host: 'username:password@two.example.com', + hostname: 'username:password@one.example.com', + path: '/bar' } - const req = requestFromUrlAndOptions( - 'http://username2:password2@two.example.com/bar', - options - ) + const req = requestFromOptions(options) + const url1 = getUrlFromRequestAndOptions(null, null) const url2 = getUrlFromRequestAndOptions(req, null) const url3 = getUrlFromRequestAndOptions(null, options) const url4 = getUrlFromRequestAndOptions({}, options) t.equal(url1, false, 'no url returned') - t.equal(url2, 'http://two.example.com/bar', 'no url returned') + t.ok(url2, 'URL returned') t.equal(url3, false, 'no url returned') t.equal(url4, false, 'no url returned') t.end() From 4ef57893a8dc964d857313379f086817563f36a7 Mon Sep 17 00:00:00 2001 From: Alan Storm Date: Mon, 12 Apr 2021 16:12:44 -0700 Subject: [PATCH 04/12] fix: cleanup test names --- lib/instrumentation/http-shared.js | 2 +- .../modules/http/request-to-url.js | 16 ++++++++-------- 2 files changed, 9 insertions(+), 9 deletions(-) diff --git a/lib/instrumentation/http-shared.js b/lib/instrumentation/http-shared.js index 30e23221ff..0ac83890c3 100644 --- a/lib/instrumentation/http-shared.js +++ b/lib/instrumentation/http-shared.js @@ -258,7 +258,7 @@ function isAWSSigned (opts) { return typeof auth === 'string' ? auth.startsWith('AWS4-') : false } -// creates a sanitized URL suitable for the span's HTTP context +// Creates a sanitized URL suitable for the span's HTTP context // // This function recontructs a URL using the request object's // properties where it can, and falling back to the options diff --git a/test/instrumentation/modules/http/request-to-url.js b/test/instrumentation/modules/http/request-to-url.js index 34f14744b8..4f8b34948d 100644 --- a/test/instrumentation/modules/http/request-to-url.js +++ b/test/instrumentation/modules/http/request-to-url.js @@ -18,8 +18,8 @@ function requestFromOptions (options) { return req } -tape('getUrlFromRequestAndOptions', function (suite) { - suite.test('host', function (t) { +tape('getUrlFromRequestAndOptions tests', function (suite) { + suite.test('options with host', function (t) { const options = { host: 'example.com' } @@ -30,7 +30,7 @@ tape('getUrlFromRequestAndOptions', function (suite) { t.end() }) - suite.test('host, path', function (t) { + suite.test('options with host and path', function (t) { const options = { host: 'example.com', path: '/foo' @@ -42,7 +42,7 @@ tape('getUrlFromRequestAndOptions', function (suite) { t.end() }) - suite.test('host, path, port, and query string', function (t) { + suite.test('options with host, path, port, and a query string', function (t) { const options = { host: 'example.com', path: '/foo?fpp=bar', @@ -55,7 +55,7 @@ tape('getUrlFromRequestAndOptions', function (suite) { t.end() }) - suite.test('host, path, port, query string, password', function (t) { + suite.test('options with host, path, port, query string, and a username/password', function (t) { const options = { host: 'username:password@example.com', path: '/foo?fpp=bar', @@ -68,7 +68,7 @@ tape('getUrlFromRequestAndOptions', function (suite) { t.end() }) - suite.test('hostname beats host', function (t) { + suite.test('options with host and hostname', function (t) { const options = { host: 'username:password@two.example.com', hostname: 'username:password@one.example.com', @@ -76,11 +76,11 @@ tape('getUrlFromRequestAndOptions', function (suite) { } const req = requestFromOptions(options) const url = getUrlFromRequestAndOptions(req, options) - t.equals(url, 'http://one.example.com/bar', 'url rendered as expected') + t.equals(url, 'http://one.example.com/bar', 'url rendered as expected (hostname wins)') t.end() }) - suite.test('exceptions handled', function (t) { + suite.test('does not crash with unexpected data', function (t) { const options = { host: 'username:password@two.example.com', hostname: 'username:password@one.example.com', From baf27fca7df630ad14e4480cb56d32a55c8e9bd8 Mon Sep 17 00:00:00 2001 From: Alan Storm Date: Mon, 12 Apr 2021 16:16:46 -0700 Subject: [PATCH 05/12] fix: update comments --- lib/instrumentation/http-shared.js | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/lib/instrumentation/http-shared.js b/lib/instrumentation/http-shared.js index 0ac83890c3..1733d6edcd 100644 --- a/lib/instrumentation/http-shared.js +++ b/lib/instrumentation/http-shared.js @@ -266,7 +266,8 @@ function isAWSSigned (opts) { // // NOTE: The options argument may not be the same options that are passed to // http.request if the caller uses the the (url,options,...) method signature. -// The agent pre-normalizes the url and options into a single options array. +// The agent normalizes the url and options into a single options array. This +// function expects those pre-normalized options. // // @param {req} ClientRequest // @param {options} object From 19ad59f0b5f41683d576d7ead53f6a253f9a0169 Mon Sep 17 00:00:00 2001 From: Alan Storm Date: Mon, 12 Apr 2021 16:22:21 -0700 Subject: [PATCH 06/12] fix: update comments --- lib/instrumentation/http-shared.js | 17 ++++++++--------- 1 file changed, 8 insertions(+), 9 deletions(-) diff --git a/lib/instrumentation/http-shared.js b/lib/instrumentation/http-shared.js index 1733d6edcd..8d154ac26f 100644 --- a/lib/instrumentation/http-shared.js +++ b/lib/instrumentation/http-shared.js @@ -260,17 +260,16 @@ function isAWSSigned (opts) { // Creates a sanitized URL suitable for the span's HTTP context // -// This function recontructs a URL using the request object's -// properties where it can, and falling back to the options -// where it can not. +// This function reconstructs a URL using the request object's properties +// where it can, and falling back to the options where it can not. // -// NOTE: The options argument may not be the same options that are passed to -// http.request if the caller uses the the (url,options,...) method signature. -// The agent normalizes the url and options into a single options array. This -// function expects those pre-normalized options. +// NOTE: The options argument may not be the same options that are passed +// to http.request if the caller uses the the http.request(url,options,...) +// method signature. The agent normalizes the url and options into a single +// options array. This function expects those pre-normalized options. // -// @param {req} ClientRequest -// @param {options} object +// @param {ClientRequest} req +// @param {object} options // @return string function getUrlFromRequestAndOptions (req, options) { if (!req) { From 9c4854ed2fecdb86d96d7832798564a050009b6d Mon Sep 17 00:00:00 2001 From: Alan Storm Date: Mon, 12 Apr 2021 16:47:20 -0700 Subject: [PATCH 07/12] fix: node 8 and localhost --- lib/instrumentation/http-shared.js | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/lib/instrumentation/http-shared.js b/lib/instrumentation/http-shared.js index 8d154ac26f..e2cc1d63db 100644 --- a/lib/instrumentation/http-shared.js +++ b/lib/instrumentation/http-shared.js @@ -280,9 +280,8 @@ function getUrlFromRequestAndOptions (req, options) { req.agent = req.agent || {} const port = options.port ? `:${options.port}` : '' - const host = req.host || options.hostname || options.host || 'unknown' + const host = req.host || options.hostname || options.host || 'localhost' const protocol = req.protocol || req.agent.protocol - let url try { url = new URL(`${protocol}//${host}${port}${req.path}`) From 9eb33ecc3f9c0a606be135344f6139d4ae6bd6fc Mon Sep 17 00:00:00 2001 From: Alan Storm Date: Wed, 14 Apr 2021 15:12:36 -0700 Subject: [PATCH 08/12] fix: PR feedback --- lib/instrumentation/http-shared.js | 38 +++++++++++-------- .../modules/http/request-to-url.js | 24 +++++++----- 2 files changed, 36 insertions(+), 26 deletions(-) diff --git a/lib/instrumentation/http-shared.js b/lib/instrumentation/http-shared.js index e2cc1d63db..d064670451 100644 --- a/lib/instrumentation/http-shared.js +++ b/lib/instrumentation/http-shared.js @@ -1,7 +1,6 @@ 'use strict' var url = require('url') -const { URL } = require('url') var endOfStream = require('end-of-stream') var { parseUrl } = require('../parsers') @@ -190,7 +189,7 @@ exports.traceOutgoingRequest = function (agent, moduleName, method) { return emit.apply(req, arguments) } - const url = getUrlFromRequestAndOptions(req, options) + const url = getUrlFromRequestAndOptions(req, options, moduleName) if (!url) { agent.logger.warn('unable to identify http.ClientRequest url %o', { id: id }) } @@ -261,7 +260,13 @@ function isAWSSigned (opts) { // Creates a sanitized URL suitable for the span's HTTP context // // This function reconstructs a URL using the request object's properties -// where it can, and falling back to the options where it can not. +// where it can (node versions v14.5.0, v12.19.0 and later) , and falling +// back to the options where it can not. This function also strips any +// authentication information provided with the hostname. In other words +// +// http://username:password@example.com/foo +// +// becomes http://example.com/foo // // NOTE: The options argument may not be the same options that are passed // to http.request if the caller uses the the http.request(url,options,...) @@ -270,27 +275,28 @@ function isAWSSigned (opts) { // // @param {ClientRequest} req // @param {object} options -// @return string -function getUrlFromRequestAndOptions (req, options) { +// @param {string} fallbackProtocol +// @return string|undefined +function getUrlFromRequestAndOptions (req, options, fallbackProtocol) { if (!req) { - return false + return undefined } options = options || {} req = req || {} req.agent = req.agent || {} const port = options.port ? `:${options.port}` : '' + // req.host and req.protocol are node versions v14.5.0/v12.19.0 and later const host = req.host || options.hostname || options.host || 'localhost' - const protocol = req.protocol || req.agent.protocol - let url - try { - url = new URL(`${protocol}//${host}${port}${req.path}`) - } catch (e) { - // console.log(e) - return false - } - - return `${url.protocol}//${url.host}${url.pathname}${url.search}` + const protocol = req.protocol || req.agent.protocol || fallbackProtocol + // let url + // try { + // url = new URL(`${protocol}//${host}${port}${req.path}`) + // } catch (e) { + // return undefined + // } + + return `${protocol}//${host}${port}${req.path}` } exports.getUrlFromRequestAndOptions = getUrlFromRequestAndOptions diff --git a/test/instrumentation/modules/http/request-to-url.js b/test/instrumentation/modules/http/request-to-url.js index 4f8b34948d..73e90ff692 100644 --- a/test/instrumentation/modules/http/request-to-url.js +++ b/test/instrumentation/modules/http/request-to-url.js @@ -57,33 +57,39 @@ tape('getUrlFromRequestAndOptions tests', function (suite) { suite.test('options with host, path, port, query string, and a username/password', function (t) { const options = { - host: 'username:password@example.com', + host: 'example.com', path: '/foo?fpp=bar', + auth: 'username:password', port: 32 } const req = requestFromOptions(options) const url = getUrlFromRequestAndOptions(req, options) t.equals(url, 'http://example.com:32/foo?fpp=bar', 'url rendered as expected') + t.equals(url.indexOf('username'), -1, 'no auth information in url') + t.equals(url.indexOf('password'), -1, 'no auth information in url') t.end() }) suite.test('options with host and hostname', function (t) { const options = { - host: 'username:password@two.example.com', - hostname: 'username:password@one.example.com', - path: '/bar' + host: 'two.example.com', + hostname: 'one.example.com', + path: '/bar', + auth: 'username:password' } const req = requestFromOptions(options) const url = getUrlFromRequestAndOptions(req, options) t.equals(url, 'http://one.example.com/bar', 'url rendered as expected (hostname wins)') + t.equals(url.indexOf('username'), -1, 'no auth information in url') + t.equals(url.indexOf('password'), -1, 'no auth information in url') t.end() }) suite.test('does not crash with unexpected data', function (t) { const options = { - host: 'username:password@two.example.com', - hostname: 'username:password@one.example.com', + host: 'two.example.com', + hostname: 'one.example.com', path: '/bar' } const req = requestFromOptions(options) @@ -91,12 +97,10 @@ tape('getUrlFromRequestAndOptions tests', function (suite) { const url1 = getUrlFromRequestAndOptions(null, null) const url2 = getUrlFromRequestAndOptions(req, null) const url3 = getUrlFromRequestAndOptions(null, options) - const url4 = getUrlFromRequestAndOptions({}, options) - t.equal(url1, false, 'no url returned') + t.equal(url1, undefined, 'no url returned') t.ok(url2, 'URL returned') - t.equal(url3, false, 'no url returned') - t.equal(url4, false, 'no url returned') + t.equal(url3, undefined, 'no url returned') t.end() }) From d07108594b48fd340905d5d844f50667fd69f943 Mon Sep 17 00:00:00 2001 From: Alan Storm Date: Wed, 14 Apr 2021 15:28:41 -0700 Subject: [PATCH 09/12] test: new test for handling of the default port --- test/instrumentation/modules/http/request-to-url.js | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/test/instrumentation/modules/http/request-to-url.js b/test/instrumentation/modules/http/request-to-url.js index 73e90ff692..65ba6f545c 100644 --- a/test/instrumentation/modules/http/request-to-url.js +++ b/test/instrumentation/modules/http/request-to-url.js @@ -104,5 +104,16 @@ tape('getUrlFromRequestAndOptions tests', function (suite) { t.end() }) + suite.test('port 80 makes it through', function (t) { + const options = { + host: 'two.example.com', + port: 80 + } + const req = requestFromOptions(options) + + const url = getUrlFromRequestAndOptions(req, options) + t.equals(url, 'http://two.example.com:80/', 'port 80 made it thorugh') + t.end() + }) suite.end() }) From a28e0a4f4013f845ac88c37bc59f8bbc89b652f8 Mon Sep 17 00:00:00 2001 From: Alan Storm Date: Thu, 15 Apr 2021 11:52:09 -0700 Subject: [PATCH 10/12] docs: CHANGELOG --- CHANGELOG.asciidoc | 3 +++ 1 file changed, 3 insertions(+) diff --git a/CHANGELOG.asciidoc b/CHANGELOG.asciidoc index 3eed4f2cda..bc379a4874 100644 --- a/CHANGELOG.asciidoc +++ b/CHANGELOG.asciidoc @@ -58,6 +58,9 @@ variable), then it should be removed. [float] ===== Bug fixes +* Fixed bug where the URL property for outgoing HTTP request spans was set + with the server's IP address rather than its hostname. The Agent now sets + this property with the actual URL requested by Node.js. [[release-notes-3.13.0]] ==== 3.13.0 - 2021/04/06 From 0fcb0a7b49aad0d397c87de3622a8dcf8bee8be0 Mon Sep 17 00:00:00 2001 From: Alan Storm Date: Thu, 15 Apr 2021 15:35:13 -0700 Subject: [PATCH 11/12] fix: comment cleanup, test case for agent: addRequest(){} --- lib/instrumentation/http-shared.js | 8 +------- .../modules/http/request-to-url.js | 17 +++++++++++++++++ 2 files changed, 18 insertions(+), 7 deletions(-) diff --git a/lib/instrumentation/http-shared.js b/lib/instrumentation/http-shared.js index d064670451..8b0e680fb4 100644 --- a/lib/instrumentation/http-shared.js +++ b/lib/instrumentation/http-shared.js @@ -189,7 +189,7 @@ exports.traceOutgoingRequest = function (agent, moduleName, method) { return emit.apply(req, arguments) } - const url = getUrlFromRequestAndOptions(req, options, moduleName) + const url = getUrlFromRequestAndOptions(req, options, moduleName+':') if (!url) { agent.logger.warn('unable to identify http.ClientRequest url %o', { id: id }) } @@ -289,12 +289,6 @@ function getUrlFromRequestAndOptions (req, options, fallbackProtocol) { // req.host and req.protocol are node versions v14.5.0/v12.19.0 and later const host = req.host || options.hostname || options.host || 'localhost' const protocol = req.protocol || req.agent.protocol || fallbackProtocol - // let url - // try { - // url = new URL(`${protocol}//${host}${port}${req.path}`) - // } catch (e) { - // return undefined - // } return `${protocol}//${host}${port}${req.path}` } diff --git a/test/instrumentation/modules/http/request-to-url.js b/test/instrumentation/modules/http/request-to-url.js index 65ba6f545c..7435755d36 100644 --- a/test/instrumentation/modules/http/request-to-url.js +++ b/test/instrumentation/modules/http/request-to-url.js @@ -115,5 +115,22 @@ tape('getUrlFromRequestAndOptions tests', function (suite) { t.equals(url, 'http://two.example.com:80/', 'port 80 made it thorugh') t.end() }) + + suite.test('missing protocol', function (t) { + const options = { + hostname: 'localhost', + path: '/get', + // A custom agent that implements the minimum to pass muster, but does + // *not* define `agent.protocol`. + agent: { + addRequest() {} + } + } + const req = requestFromOptions(options) + + const url = getUrlFromRequestAndOptions(req, options,'http:') + t.equals(url, 'http://localhost/get', 'protocol falls back correctly') + t.end() + }) suite.end() }) From c3ed44869cf24bb7fb9bbe12455d3c1a07ee7521 Mon Sep 17 00:00:00 2001 From: Alan Storm Date: Thu, 15 Apr 2021 16:13:38 -0700 Subject: [PATCH 12/12] fix: lint --- lib/instrumentation/http-shared.js | 2 +- test/instrumentation/modules/http/request-to-url.js | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/lib/instrumentation/http-shared.js b/lib/instrumentation/http-shared.js index 8b0e680fb4..8d1923e1ae 100644 --- a/lib/instrumentation/http-shared.js +++ b/lib/instrumentation/http-shared.js @@ -189,7 +189,7 @@ exports.traceOutgoingRequest = function (agent, moduleName, method) { return emit.apply(req, arguments) } - const url = getUrlFromRequestAndOptions(req, options, moduleName+':') + const url = getUrlFromRequestAndOptions(req, options, moduleName + ':') if (!url) { agent.logger.warn('unable to identify http.ClientRequest url %o', { id: id }) } diff --git a/test/instrumentation/modules/http/request-to-url.js b/test/instrumentation/modules/http/request-to-url.js index 7435755d36..422e16d720 100644 --- a/test/instrumentation/modules/http/request-to-url.js +++ b/test/instrumentation/modules/http/request-to-url.js @@ -123,12 +123,12 @@ tape('getUrlFromRequestAndOptions tests', function (suite) { // A custom agent that implements the minimum to pass muster, but does // *not* define `agent.protocol`. agent: { - addRequest() {} + addRequest () {} } } const req = requestFromOptions(options) - const url = getUrlFromRequestAndOptions(req, options,'http:') + const url = getUrlFromRequestAndOptions(req, options, 'http:') t.equals(url, 'http://localhost/get', 'protocol falls back correctly') t.end() })