Skip to content

Commit

Permalink
feat(sdk-crash-detection): Add event stripper
Browse files Browse the repository at this point in the history
Add class event stripper, which only keeps event data relevant for
the SDK crash.
  • Loading branch information
philipphofmann committed Jun 15, 2023
1 parent 8c2a6df commit 95086b4
Show file tree
Hide file tree
Showing 3 changed files with 320 additions and 1 deletion.
132 changes: 132 additions & 0 deletions src/sentry/utils/sdk_crashes/event_stripper.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,132 @@
from enum import Enum, auto
from typing import Any, Dict, Mapping, Optional, Sequence

from sentry.db.models import NodeData
from sentry.utils.safe import get_path
from sentry.utils.sdk_crashes.sdk_crash_detector import SDKCrashDetector


class Allow(Enum):
def __init__(self, explanation: str = "") -> None:
self.explanation = explanation

"""Keeps the event data if it is of type str, int, float, bool."""
SIMPLE_TYPE = auto()

"""Keeps the event data no matter the type."""
ALL = auto()

"""
Doesn't keep the event data no matter the type. This can be used to explicitly
specify that data should be removed with an explanation.
"""
NEVER = auto()

def with_explanation(self, explanation: str) -> "Allow":
self.explanation = explanation
return self


EVENT_DATA_ALLOWLIST = {
"type": Allow.SIMPLE_TYPE,
"datetime": Allow.SIMPLE_TYPE,
"timestamp": Allow.SIMPLE_TYPE,
"platform": Allow.SIMPLE_TYPE,
"sdk": {
"name": Allow.SIMPLE_TYPE,
"version": Allow.SIMPLE_TYPE,
"integrations": Allow.NEVER.with_explanation("Users can add their own integrations."),
},
"exception": Allow.ALL.with_explanation("We strip the exception data separately."),
"debug_meta": Allow.ALL,
"contexts": {
"device": {
"family": Allow.SIMPLE_TYPE,
"model": Allow.SIMPLE_TYPE,
"arch": Allow.SIMPLE_TYPE,
},
"os": {
"name": Allow.SIMPLE_TYPE,
"version": Allow.SIMPLE_TYPE,
"build": Allow.SIMPLE_TYPE,
},
},
}


def strip_event_data(
event_data: NodeData, sdk_crash_detector: SDKCrashDetector
) -> Mapping[str, Any]:
new_event_data = _strip_event_data_with_allowlist(event_data, EVENT_DATA_ALLOWLIST)

if (new_event_data is None) or (new_event_data == {}):
return {}

stripped_frames: Sequence[Mapping[str, Any]] = []
frames = get_path(new_event_data, "exception", "values", -1, "stacktrace", "frames")

if frames is not None:
stripped_frames = _strip_frames(frames, sdk_crash_detector)
new_event_data["exception"]["values"][0]["stacktrace"]["frames"] = stripped_frames

debug_meta_images = get_path(new_event_data, "debug_meta", "images")
if debug_meta_images is not None:
stripped_debug_meta_images = _strip_debug_meta(debug_meta_images, stripped_frames)
new_event_data["debug_meta"]["images"] = stripped_debug_meta_images

return new_event_data


def _strip_event_data_with_allowlist(
data: Mapping[str, Any], allowlist: Optional[Mapping[str, Any]]
) -> Optional[Mapping[str, Any]]:
"""
Recursively traverses the data and only keeps values based on the allowlist.
"""
if allowlist is None:
return None

stripped_data: Dict[str, Any] = {}
for data_key, data_value in data.items():
allowlist_for_data = allowlist.get(data_key)
if allowlist_for_data is None:
continue

if isinstance(allowlist_for_data, Allow):
allowed = allowlist_for_data

if allowed is Allow.SIMPLE_TYPE and isinstance(data_value, (str, int, float, bool)):
stripped_data[data_key] = data_value
elif allowed is Allow.ALL:
stripped_data[data_key] = data_value
else:
continue

elif isinstance(data_value, Mapping):
stripped_data[data_key] = _strip_event_data_with_allowlist(
data_value, allowlist_for_data
)

return stripped_data


def _strip_debug_meta(
debug_meta_images: Sequence[Mapping[str, Any]], frames: Sequence[Mapping[str, Any]]
) -> Sequence[Mapping[str, Any]]:

frame_image_addresses = {frame["image_addr"] for frame in frames}

return [image for image in debug_meta_images if image["image_addr"] in frame_image_addresses]


def _strip_frames(
frames: Sequence[Mapping[str, Any]], sdk_crash_detector: SDKCrashDetector
) -> Sequence[Mapping[str, Any]]:
"""
Only keep SDK frames or non in app frames.
"""
return [
frame
for frame in frames
if sdk_crash_detector.is_sdk_frame(frame) or frame.get("in_app", None) is False
]
3 changes: 2 additions & 1 deletion src/sentry/utils/sdk_crashes/sdk_crash_detection.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@
from sentry.issues.grouptype import GroupCategory
from sentry.utils.safe import get_path, set_path
from sentry.utils.sdk_crashes.cocoa_sdk_crash_detector import CocoaSDKCrashDetector
from sentry.utils.sdk_crashes.event_stripper import strip_event_data
from sentry.utils.sdk_crashes.sdk_crash_detector import SDKCrashDetector


Expand Down Expand Up @@ -54,7 +55,7 @@ def detect_sdk_crash(self, event: Event, event_project_id: int) -> Optional[Even

if self.cocoa_sdk_crash_detector.is_sdk_crash(frames):
# We still need to strip event data for to avoid collecting PII. We will do this in a separate PR.
sdk_crash_event_data = event.data
sdk_crash_event_data = strip_event_data(event.data, self.cocoa_sdk_crash_detector)

set_path(
sdk_crash_event_data, "contexts", "sdk_crash_detection", value={"detected": True}
Expand Down
186 changes: 186 additions & 0 deletions tests/sentry/utils/sdk_crashes/test_event_stripper.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,186 @@
import abc
from unittest.mock import Mock

from fixtures.sdk_crash_detection.crash_event import (
IN_APP_FRAME,
get_crash_event,
get_crash_event_with_frames,
get_frames,
)
from sentry.testutils import TestCase
from sentry.testutils.cases import BaseTestCase
from sentry.testutils.silo import region_silo_test
from sentry.utils.safe import get_path
from sentry.utils.sdk_crashes.cocoa_sdk_crash_detector import CocoaSDKCrashDetector
from sentry.utils.sdk_crashes.event_stripper import strip_event_data


class BaseEventStripperMixin(BaseTestCase, metaclass=abc.ABCMeta):
@abc.abstractmethod
def create_event(self, data, project_id, assert_no_errors=True):
pass

def execute_test(self):
pass


class EventStripperTestMixin(BaseEventStripperMixin):
def test_strip_event_data_keeps_allowed_keys(self):
event = self.create_event(
data=get_crash_event(),
project_id=self.project.id,
)

stripped_event_data = strip_event_data(event.data, CocoaSDKCrashDetector())

keys_removed = {"tags", "user", "threads", "breadcrumbs", "environment"}
for key in keys_removed:
assert stripped_event_data.get(key) is None, f"key {key} should be removed"

keys_kept = {
"type",
"timestamp",
"platform",
"sdk",
"exception",
"debug_meta",
"contexts",
}

for key in keys_kept:
assert stripped_event_data.get(key) is not None, f"key {key} should be kept"

def test_strip_event_data_without_debug_meta(self):
event_data = get_crash_event()
debug_meta = dict(get_path(event_data, "debug_meta"))
debug_meta.pop("images")
event_data["debug_meta"] = debug_meta

event = self.create_event(
data=event_data,
project_id=self.project.id,
)

stripped_event_data = strip_event_data(event.data, CocoaSDKCrashDetector())

debug_meta_images = get_path(stripped_event_data, "debug_meta", "images")
assert debug_meta_images is None

def test_strip_event_data_strips_context(self):
event = self.create_event(
data=get_crash_event(),
project_id=self.project.id,
)

stripped_event_data = strip_event_data(event.data, CocoaSDKCrashDetector())

assert stripped_event_data.get("contexts") == {
"os": {
"name": "iOS",
"version": "16.3",
"build": "20D47",
},
"device": {
"family": "iOS",
"model": "iPhone14,8",
"arch": "arm64e",
},
}

def test_strip_event_data_strips_sdk(self):
event = self.create_event(
data=get_crash_event(),
project_id=self.project.id,
)

stripped_event_data = strip_event_data(event.data, CocoaSDKCrashDetector())

assert stripped_event_data.get("sdk") == {
"name": "sentry.cocoa",
"version": "8.1.0",
}

def test_strip_event_data_strips_value_if_not_simple_type(self):
event = self.create_event(
data=get_crash_event(),
project_id=self.project.id,
)
event.data["type"] = {"foo": "bar"}

stripped_event_data = strip_event_data(event.data, CocoaSDKCrashDetector())

assert stripped_event_data.get("type") is None

def test_strip_event_data_keeps_simple_types(self):
event = self.create_event(
data=get_crash_event(),
project_id=self.project.id,
)
event.data["type"] = True
event.data["datetime"] = 0.1
event.data["timestamp"] = 1
event.data["platform"] = "cocoa"

stripped_event_data = strip_event_data(event.data, CocoaSDKCrashDetector())

assert stripped_event_data.get("type") is True
assert stripped_event_data.get("datetime") == 0.1
assert stripped_event_data.get("timestamp") == 1
assert stripped_event_data.get("platform") == "cocoa"

def test_strip_event_data_strips_non_referenced_dsyms(self):
event = self.create_event(
data=get_crash_event(),
project_id=self.project.id,
)

stripped_event_data = strip_event_data(event.data, Mock())

debug_meta_images = get_path(stripped_event_data, "debug_meta", "images")

image_addresses = set(map(lambda image: image["image_addr"], debug_meta_images))
expected_image_addresses = {"0x1a4e8f000", "0x100304000", "0x100260000"}
assert image_addresses == expected_image_addresses

def test_strip_frames_sentry_in_app_frame_kept(self):
frames = get_frames("SentryCrashMonitor_CPPException.cpp", sentry_frame_in_app=True)
self._execute_strip_frames_test(frames)

def test_strip_frames_sentry_non_in_app_frame_kept(self):
frames = get_frames("SentryCrashMonitor_CPPException.cpp", sentry_frame_in_app=False)
self._execute_strip_frames_test(frames)

def _execute_strip_frames_test(self, frames):
event_data = get_crash_event_with_frames(frames)

event = self.create_event(
data=event_data,
project_id=self.project.id,
)

stripped_event_data = strip_event_data(event.data, CocoaSDKCrashDetector())

stripped_frames = get_path(
stripped_event_data, "exception", "values", -1, "stacktrace", "frames"
)

assert len(stripped_frames) == 6
assert (
len(
[
frame
for frame in stripped_frames
if frame["function"] == IN_APP_FRAME["function"]
]
)
== 0
), "in_app frame should be removed"


@region_silo_test
class EventStripperTest(
TestCase,
EventStripperTestMixin,
):
def create_event(self, data, project_id, assert_no_errors=True):
return self.store_event(data=data, project_id=project_id, assert_no_errors=assert_no_errors)

0 comments on commit 95086b4

Please sign in to comment.