From 2540fd9b52769d4c3d01f41db06675710a482653 Mon Sep 17 00:00:00 2001 From: Trent Smith <1429913+Bento007@users.noreply.github.com> Date: Fri, 12 May 2023 09:30:52 -0700 Subject: [PATCH] feat: Add correlation ids to logs to improve debugging (#606) - Add a unique request_id to all messages logged per request. - Return a unique X-Request-ID that correlated with log messages produce during a request. - Add JSON log formatter to format all logs into JSON. --- server/app/app.py | 8 +++++-- server/app/logging.py | 25 ++++++++++++++++++++ server/app/request_id.py | 24 +++++++++++++++++++ server/requirements.txt | 3 ++- server/tests/unit/common/apis/test_api_v3.py | 8 +++++++ 5 files changed, 65 insertions(+), 3 deletions(-) create mode 100644 server/app/logging.py create mode 100644 server/app/request_id.py diff --git a/server/app/app.py b/server/app/app.py index 1da8be8f2..4f5a87dac 100644 --- a/server/app/app.py +++ b/server/app/app.py @@ -6,13 +6,15 @@ from http import HTTPStatus from urllib.parse import urlparse -from flask import Blueprint, Flask, Response, abort, current_app, make_response, redirect, render_template, request +from flask import Blueprint, Flask, Response, abort, current_app, g, make_response, redirect, render_template, request from flask_restful import Api, Resource from server_timing import Timing as ServerTiming import server.common.rest as common_rest from server.app.api.util import get_data_adaptor, get_dataset_artifact_s3_uri from server.app.api.v3 import register_api_v3 +from server.app.logging import configure_logging +from server.app.request_id import generate_request_id, get_request_id from server.common.errors import ( DatasetAccessError, DatasetNotFoundError, @@ -25,7 +27,7 @@ from server.common.utils.utils import Float32JSONEncoder, path_join from server.dataset.matrix_loader import DataLoader -logging.basicConfig(level=logging.INFO) +configure_logging() @webbp.errorhandler(RequestException) @@ -192,6 +194,7 @@ def __init__(self, app_config): @self.app.before_request def pre_request_logging(): + g.request_id = generate_request_id() message = json.dumps(dict(url=request.path, method=request.method, schema=request.scheme)) self.app.logger.info(message) @@ -206,5 +209,6 @@ def post_request_logging(response: Response): schema=request.scheme, ) ) + response.headers["X-Request-Id"] = get_request_id() self.app.logger.info(message) return response diff --git a/server/app/logging.py b/server/app/logging.py new file mode 100644 index 000000000..947e3856f --- /dev/null +++ b/server/app/logging.py @@ -0,0 +1,25 @@ +import logging +from typing import List + +from pythonjsonlogger import jsonlogger + +from server.app.request_id import RequestIdFilter + + +def format_log_string(fields: List[str]) -> str: + return " ".join([f"%({field})" for field in fields]) + + +LOGGED_FIELDS = ["levelname", "asctime", "name", "message", "lineno", "pathname", "request_id"] +LOG_FORMAT = format_log_string(LOGGED_FIELDS) + + +DATETIME_FORMAT = "%Y-%m-%dT%H:%M:%S.%03dZ" + + +def configure_logging(): + logHandler = logging.StreamHandler() + formatter = jsonlogger.JsonFormatter(fmt=LOG_FORMAT, datefmt=DATETIME_FORMAT) + logHandler.setFormatter(formatter) + logHandler.addFilter(RequestIdFilter()) + logging.basicConfig(level=logging.INFO, handlers=[logHandler], force=True) diff --git a/server/app/request_id.py b/server/app/request_id.py new file mode 100644 index 000000000..40fb321ce --- /dev/null +++ b/server/app/request_id.py @@ -0,0 +1,24 @@ +""" +Inspired by: http://blog.mcpolemic.com/2016/01/18/adding-request-ids-to-flask.html +""" +import logging +import uuid + +import flask + + +def generate_request_id(): + return uuid.uuid4() + + +def get_request_id(): + return getattr(flask.g, "request_id", None) + + +class RequestIdFilter(logging.Filter): + # This is a logging filter that makes the request ID available for use in + # the logging format. Note that we're checking if we're in a request + # context, as we may want to log things before Flask is fully loaded. + def filter(self, record): + record.request_id = flask.g.request_id if flask.has_request_context() else "" + return True diff --git a/server/requirements.txt b/server/requirements.txt index 25200adb8..25c4d9d01 100644 --- a/server/requirements.txt +++ b/server/requirements.txt @@ -21,4 +21,5 @@ scipy==1.10.1 flask-server-timing==0.1.2 s3fs==0.4.2 tiledb==0.21.1 # Explorer's major/minor tiledb version should always be the >= Portal's tiledb major/minor version (for read/write compatibility) -Werkzeug==2.2.3 # 2.3.0 breaks the binary endpoints \ No newline at end of file +Werkzeug==2.2.3 # 2.3.0 breaks the binary endpoints +python-json-logger==2.0.7 diff --git a/server/tests/unit/common/apis/test_api_v3.py b/server/tests/unit/common/apis/test_api_v3.py index 94c01b5b9..62400bdab 100644 --- a/server/tests/unit/common/apis/test_api_v3.py +++ b/server/tests/unit/common/apis/test_api_v3.py @@ -67,6 +67,14 @@ def setUpClass(cls, app_config=None): except requests.exceptions.ConnectionError: time.sleep(1) + def test_request_id(self): + endpoint = "schema" + url_base = self.TEST_URL_BASE + url = f"{url_base}{endpoint}" + result = self.client.get(url) + self.assertEqual(result.status_code, HTTPStatus.OK) + self.assertIn("X-Request-ID", result.headers) + def test_initialize(self): endpoint = "schema" for url_base in [self.TEST_URL_BASE, self.TEST_URL_BASE_SPARSE]: