From 95086b406dec79e6bcef45f299a3e92f727da2c0 Mon Sep 17 00:00:00 2001 From: Philipp Hofmann Date: Thu, 15 Jun 2023 09:22:41 +0200 Subject: [PATCH] feat(sdk-crash-detection): Add event stripper Add class event stripper, which only keeps event data relevant for the SDK crash. --- .../utils/sdk_crashes/event_stripper.py | 132 +++++++++++++ .../utils/sdk_crashes/sdk_crash_detection.py | 3 +- .../utils/sdk_crashes/test_event_stripper.py | 186 ++++++++++++++++++ 3 files changed, 320 insertions(+), 1 deletion(-) create mode 100644 src/sentry/utils/sdk_crashes/event_stripper.py create mode 100644 tests/sentry/utils/sdk_crashes/test_event_stripper.py diff --git a/src/sentry/utils/sdk_crashes/event_stripper.py b/src/sentry/utils/sdk_crashes/event_stripper.py new file mode 100644 index 00000000000000..741aa916ba6d15 --- /dev/null +++ b/src/sentry/utils/sdk_crashes/event_stripper.py @@ -0,0 +1,132 @@ +from enum import Enum, auto +from typing import Any, Dict, Mapping, Optional, Sequence + +from sentry.db.models import NodeData +from sentry.utils.safe import get_path +from sentry.utils.sdk_crashes.sdk_crash_detector import SDKCrashDetector + + +class Allow(Enum): + def __init__(self, explanation: str = "") -> None: + self.explanation = explanation + + """Keeps the event data if it is of type str, int, float, bool.""" + SIMPLE_TYPE = auto() + + """Keeps the event data no matter the type.""" + ALL = auto() + + """ + Doesn't keep the event data no matter the type. This can be used to explicitly + specify that data should be removed with an explanation. + """ + NEVER = auto() + + def with_explanation(self, explanation: str) -> "Allow": + self.explanation = explanation + return self + + +EVENT_DATA_ALLOWLIST = { + "type": Allow.SIMPLE_TYPE, + "datetime": Allow.SIMPLE_TYPE, + "timestamp": Allow.SIMPLE_TYPE, + "platform": Allow.SIMPLE_TYPE, + "sdk": { + "name": Allow.SIMPLE_TYPE, + "version": Allow.SIMPLE_TYPE, + "integrations": Allow.NEVER.with_explanation("Users can add their own integrations."), + }, + "exception": Allow.ALL.with_explanation("We strip the exception data separately."), + "debug_meta": Allow.ALL, + "contexts": { + "device": { + "family": Allow.SIMPLE_TYPE, + "model": Allow.SIMPLE_TYPE, + "arch": Allow.SIMPLE_TYPE, + }, + "os": { + "name": Allow.SIMPLE_TYPE, + "version": Allow.SIMPLE_TYPE, + "build": Allow.SIMPLE_TYPE, + }, + }, +} + + +def strip_event_data( + event_data: NodeData, sdk_crash_detector: SDKCrashDetector +) -> Mapping[str, Any]: + new_event_data = _strip_event_data_with_allowlist(event_data, EVENT_DATA_ALLOWLIST) + + if (new_event_data is None) or (new_event_data == {}): + return {} + + stripped_frames: Sequence[Mapping[str, Any]] = [] + frames = get_path(new_event_data, "exception", "values", -1, "stacktrace", "frames") + + if frames is not None: + stripped_frames = _strip_frames(frames, sdk_crash_detector) + new_event_data["exception"]["values"][0]["stacktrace"]["frames"] = stripped_frames + + debug_meta_images = get_path(new_event_data, "debug_meta", "images") + if debug_meta_images is not None: + stripped_debug_meta_images = _strip_debug_meta(debug_meta_images, stripped_frames) + new_event_data["debug_meta"]["images"] = stripped_debug_meta_images + + return new_event_data + + +def _strip_event_data_with_allowlist( + data: Mapping[str, Any], allowlist: Optional[Mapping[str, Any]] +) -> Optional[Mapping[str, Any]]: + """ + Recursively traverses the data and only keeps values based on the allowlist. + """ + if allowlist is None: + return None + + stripped_data: Dict[str, Any] = {} + for data_key, data_value in data.items(): + allowlist_for_data = allowlist.get(data_key) + if allowlist_for_data is None: + continue + + if isinstance(allowlist_for_data, Allow): + allowed = allowlist_for_data + + if allowed is Allow.SIMPLE_TYPE and isinstance(data_value, (str, int, float, bool)): + stripped_data[data_key] = data_value + elif allowed is Allow.ALL: + stripped_data[data_key] = data_value + else: + continue + + elif isinstance(data_value, Mapping): + stripped_data[data_key] = _strip_event_data_with_allowlist( + data_value, allowlist_for_data + ) + + return stripped_data + + +def _strip_debug_meta( + debug_meta_images: Sequence[Mapping[str, Any]], frames: Sequence[Mapping[str, Any]] +) -> Sequence[Mapping[str, Any]]: + + frame_image_addresses = {frame["image_addr"] for frame in frames} + + return [image for image in debug_meta_images if image["image_addr"] in frame_image_addresses] + + +def _strip_frames( + frames: Sequence[Mapping[str, Any]], sdk_crash_detector: SDKCrashDetector +) -> Sequence[Mapping[str, Any]]: + """ + Only keep SDK frames or non in app frames. + """ + return [ + frame + for frame in frames + if sdk_crash_detector.is_sdk_frame(frame) or frame.get("in_app", None) is False + ] diff --git a/src/sentry/utils/sdk_crashes/sdk_crash_detection.py b/src/sentry/utils/sdk_crashes/sdk_crash_detection.py index ab349a43b659b5..edf1207ad3e428 100644 --- a/src/sentry/utils/sdk_crashes/sdk_crash_detection.py +++ b/src/sentry/utils/sdk_crashes/sdk_crash_detection.py @@ -6,6 +6,7 @@ from sentry.issues.grouptype import GroupCategory from sentry.utils.safe import get_path, set_path from sentry.utils.sdk_crashes.cocoa_sdk_crash_detector import CocoaSDKCrashDetector +from sentry.utils.sdk_crashes.event_stripper import strip_event_data from sentry.utils.sdk_crashes.sdk_crash_detector import SDKCrashDetector @@ -54,7 +55,7 @@ def detect_sdk_crash(self, event: Event, event_project_id: int) -> Optional[Even if self.cocoa_sdk_crash_detector.is_sdk_crash(frames): # We still need to strip event data for to avoid collecting PII. We will do this in a separate PR. - sdk_crash_event_data = event.data + sdk_crash_event_data = strip_event_data(event.data, self.cocoa_sdk_crash_detector) set_path( sdk_crash_event_data, "contexts", "sdk_crash_detection", value={"detected": True} diff --git a/tests/sentry/utils/sdk_crashes/test_event_stripper.py b/tests/sentry/utils/sdk_crashes/test_event_stripper.py new file mode 100644 index 00000000000000..06bc5423e6f896 --- /dev/null +++ b/tests/sentry/utils/sdk_crashes/test_event_stripper.py @@ -0,0 +1,186 @@ +import abc +from unittest.mock import Mock + +from fixtures.sdk_crash_detection.crash_event import ( + IN_APP_FRAME, + get_crash_event, + get_crash_event_with_frames, + get_frames, +) +from sentry.testutils import TestCase +from sentry.testutils.cases import BaseTestCase +from sentry.testutils.silo import region_silo_test +from sentry.utils.safe import get_path +from sentry.utils.sdk_crashes.cocoa_sdk_crash_detector import CocoaSDKCrashDetector +from sentry.utils.sdk_crashes.event_stripper import strip_event_data + + +class BaseEventStripperMixin(BaseTestCase, metaclass=abc.ABCMeta): + @abc.abstractmethod + def create_event(self, data, project_id, assert_no_errors=True): + pass + + def execute_test(self): + pass + + +class EventStripperTestMixin(BaseEventStripperMixin): + def test_strip_event_data_keeps_allowed_keys(self): + event = self.create_event( + data=get_crash_event(), + project_id=self.project.id, + ) + + stripped_event_data = strip_event_data(event.data, CocoaSDKCrashDetector()) + + keys_removed = {"tags", "user", "threads", "breadcrumbs", "environment"} + for key in keys_removed: + assert stripped_event_data.get(key) is None, f"key {key} should be removed" + + keys_kept = { + "type", + "timestamp", + "platform", + "sdk", + "exception", + "debug_meta", + "contexts", + } + + for key in keys_kept: + assert stripped_event_data.get(key) is not None, f"key {key} should be kept" + + def test_strip_event_data_without_debug_meta(self): + event_data = get_crash_event() + debug_meta = dict(get_path(event_data, "debug_meta")) + debug_meta.pop("images") + event_data["debug_meta"] = debug_meta + + event = self.create_event( + data=event_data, + project_id=self.project.id, + ) + + stripped_event_data = strip_event_data(event.data, CocoaSDKCrashDetector()) + + debug_meta_images = get_path(stripped_event_data, "debug_meta", "images") + assert debug_meta_images is None + + def test_strip_event_data_strips_context(self): + event = self.create_event( + data=get_crash_event(), + project_id=self.project.id, + ) + + stripped_event_data = strip_event_data(event.data, CocoaSDKCrashDetector()) + + assert stripped_event_data.get("contexts") == { + "os": { + "name": "iOS", + "version": "16.3", + "build": "20D47", + }, + "device": { + "family": "iOS", + "model": "iPhone14,8", + "arch": "arm64e", + }, + } + + def test_strip_event_data_strips_sdk(self): + event = self.create_event( + data=get_crash_event(), + project_id=self.project.id, + ) + + stripped_event_data = strip_event_data(event.data, CocoaSDKCrashDetector()) + + assert stripped_event_data.get("sdk") == { + "name": "sentry.cocoa", + "version": "8.1.0", + } + + def test_strip_event_data_strips_value_if_not_simple_type(self): + event = self.create_event( + data=get_crash_event(), + project_id=self.project.id, + ) + event.data["type"] = {"foo": "bar"} + + stripped_event_data = strip_event_data(event.data, CocoaSDKCrashDetector()) + + assert stripped_event_data.get("type") is None + + def test_strip_event_data_keeps_simple_types(self): + event = self.create_event( + data=get_crash_event(), + project_id=self.project.id, + ) + event.data["type"] = True + event.data["datetime"] = 0.1 + event.data["timestamp"] = 1 + event.data["platform"] = "cocoa" + + stripped_event_data = strip_event_data(event.data, CocoaSDKCrashDetector()) + + assert stripped_event_data.get("type") is True + assert stripped_event_data.get("datetime") == 0.1 + assert stripped_event_data.get("timestamp") == 1 + assert stripped_event_data.get("platform") == "cocoa" + + def test_strip_event_data_strips_non_referenced_dsyms(self): + event = self.create_event( + data=get_crash_event(), + project_id=self.project.id, + ) + + stripped_event_data = strip_event_data(event.data, Mock()) + + debug_meta_images = get_path(stripped_event_data, "debug_meta", "images") + + image_addresses = set(map(lambda image: image["image_addr"], debug_meta_images)) + expected_image_addresses = {"0x1a4e8f000", "0x100304000", "0x100260000"} + assert image_addresses == expected_image_addresses + + def test_strip_frames_sentry_in_app_frame_kept(self): + frames = get_frames("SentryCrashMonitor_CPPException.cpp", sentry_frame_in_app=True) + self._execute_strip_frames_test(frames) + + def test_strip_frames_sentry_non_in_app_frame_kept(self): + frames = get_frames("SentryCrashMonitor_CPPException.cpp", sentry_frame_in_app=False) + self._execute_strip_frames_test(frames) + + def _execute_strip_frames_test(self, frames): + event_data = get_crash_event_with_frames(frames) + + event = self.create_event( + data=event_data, + project_id=self.project.id, + ) + + stripped_event_data = strip_event_data(event.data, CocoaSDKCrashDetector()) + + stripped_frames = get_path( + stripped_event_data, "exception", "values", -1, "stacktrace", "frames" + ) + + assert len(stripped_frames) == 6 + assert ( + len( + [ + frame + for frame in stripped_frames + if frame["function"] == IN_APP_FRAME["function"] + ] + ) + == 0 + ), "in_app frame should be removed" + + +@region_silo_test +class EventStripperTest( + TestCase, + EventStripperTestMixin, +): + def create_event(self, data, project_id, assert_no_errors=True): + return self.store_event(data=data, project_id=project_id, assert_no_errors=assert_no_errors)