diff --git a/README.md b/README.md index df46af9..56284fe 100644 --- a/README.md +++ b/README.md @@ -8,7 +8,7 @@ Below is how to install in a plain python 3.11+ environment ``` sh -https://github.com/wagov/nbdev-squ/releases/download/v1.3.2/nbdev_squ-1.3.2-py3-none-any.whl +pip install nbdev-squ ``` The installation can also be run in a notebook (we tend to use @@ -18,7 +18,7 @@ should load the json secret *squconfig-`my_keyvault_tenantid`* from the `my_kevault_name` keyvault. ``` python -%pip install https://github.com/wagov/nbdev-squ/releases/download/v1.3.2/nbdev_squ-1.3.2-py3-none-any.whl +%pip install nbdev-squ import os; os.environ["SQU_CONFIG"] = "{{ my_keyvault_name }}/{{ my_keyvault_tenantid }}" from nbdev_squ import api diff --git a/install.py b/install.py index 98325cc..dadaa91 100755 --- a/install.py +++ b/install.py @@ -12,15 +12,5 @@ run(["npm", "run", "build"]) run(["nbdev_clean"]) run(["nbdev_export"]) - -config = configparser.ConfigParser() -config.read('settings.ini') -version = config.get("DEFAULT", "version") -git_url = config.get("DEFAULT", "git_url") -latest_download = f"{git_url}/releases/download/v{version}/nbdev_squ-{version}-py3-none-any.whl" -index_text = open("nbs/index.ipynb").read() -with open("nbs/index.ipynb", "w") as index_nb: - index_nb.write(re.sub(f"{git_url}.*?-any.whl", latest_download, index_text)) - run(["nbdev_readme"]) run(["nbdev_docs"]) \ No newline at end of file diff --git a/nbdev_squ/__init__.py b/nbdev_squ/__init__.py index f708a9b..7b1e312 100644 --- a/nbdev_squ/__init__.py +++ b/nbdev_squ/__init__.py @@ -1 +1 @@ -__version__ = "1.3.2" +__version__ = "1.3.3" diff --git a/nbdev_squ/_modidx.py b/nbdev_squ/_modidx.py index 5814a15..6290f63 100644 --- a/nbdev_squ/_modidx.py +++ b/nbdev_squ/_modidx.py @@ -27,4 +27,10 @@ 'nbdev_squ.core.azcli': ('core.html#azcli', 'nbdev_squ/core.py'), 'nbdev_squ.core.datalake_path': ('core.html#datalake_path', 'nbdev_squ/core.py'), 'nbdev_squ.core.load_config': ('core.html#load_config', 'nbdev_squ/core.py'), - 'nbdev_squ.core.login': ('core.html#login', 'nbdev_squ/core.py')}}} + 'nbdev_squ.core.login': ('core.html#login', 'nbdev_squ/core.py')}, + 'nbdev_squ.legacy': { 'nbdev_squ.legacy.adx_query': ('legacy.html#adx_query', 'nbdev_squ/legacy.py'), + 'nbdev_squ.legacy.adxtable2df': ('legacy.html#adxtable2df', 'nbdev_squ/legacy.py'), + 'nbdev_squ.legacy.export_jira_issues': ('legacy.html#export_jira_issues', 'nbdev_squ/legacy.py'), + 'nbdev_squ.legacy.flatten': ('legacy.html#flatten', 'nbdev_squ/legacy.py'), + 'nbdev_squ.legacy.sentinel_beautify_local': ( 'legacy.html#sentinel_beautify_local', + 'nbdev_squ/legacy.py')}}} diff --git a/nbdev_squ/api.py b/nbdev_squ/api.py index 714956c..37df843 100644 --- a/nbdev_squ/api.py +++ b/nbdev_squ/api.py @@ -6,7 +6,7 @@ 'atlaskit_transformer', 'security_incidents', 'security_alerts'] # %% ../nbs/01_api.ipynb 3 -import pandas, json, logging, time, requests, io, pkgutil +import pandas, json, logging, time, requests, io, pkgutil, httpx_cache from .core import * from diskcache import memoize_stampede from importlib.metadata import version @@ -15,6 +15,8 @@ from azure.identity import AzureCliCredential from benedict import benedict from functools import cached_property +from atlassian import Jira +from tenable.io import TenableIO # %% ../nbs/01_api.ipynb 5 logger = logging.getLogger(__name__) @@ -34,7 +36,6 @@ def runzero(self): """ Returns a runzero client """ - import httpx_cache return httpx_cache.Client(base_url="https://console.rumble.run/api/v1.0", headers={"Authorization": f"Bearer {self.config.runzero_apitoken}"}) @cached_property @@ -50,7 +51,6 @@ def jira(self): """ Returns a jira client """ - from atlassian import Jira return Jira(url=self.config.jira_url, username=self.config.jira_username, password=self.config.jira_password) @cached_property @@ -58,7 +58,6 @@ def tio(self): """ Returns a TenableIO client """ - from tenable.io import TenableIO return TenableIO(self.config.tenable_access_key, self.config.tenable_secret_key) diff --git a/nbdev_squ/legacy.py b/nbdev_squ/legacy.py new file mode 100644 index 0000000..485ce76 --- /dev/null +++ b/nbdev_squ/legacy.py @@ -0,0 +1,316 @@ +# AUTOGENERATED! DO NOT EDIT! File to edit: ../nbs/02_legacy.ipynb. + +# %% auto 0 +__all__ = ['logger', 'adx_query', 'adxtable2df', 'export_jira_issues', 'flatten', 'sentinel_beautify_local'] + +# %% ../nbs/02_legacy.ipynb 3 +from . import api +from markdown import markdown +from diskcache import memoize_stampede +from azure.kusto.data import KustoClient, KustoConnectionStringBuilder +import logging, pandas, json + +# %% ../nbs/02_legacy.ipynb 5 +logger = logging.getLogger(__name__) + +# %% ../nbs/02_legacy.ipynb 6 +@memoize_stampede(api.cache, expire=60 * 5) # cache for 5 mins +def adx_query(kql): + """ + Run a kusto query + + Args: + kql (str or list): kusto query or list of queries + + Returns: + json: query results + """ + if isinstance(kql, list): + kql = [".execute script with (ContinueOnErrors=true) <|"] + kql + kql = "\n".join(kql) + config = api.cache["config"] + cluster, dx_db = config.azure_dataexplorer.rsplit("/", 1) + dx_client = KustoClient(KustoConnectionStringBuilder.with_az_cli_authentication(cluster)) + return dx_client.execute(dx_db, kql.replace("\\", "\\\\")).primary_results[0] + +def adxtable2df(table): + """ + Return a pandas dataframe from an adx table + """ + columns = [col.column_name for col in table.columns] + frame = pandas.DataFrame(table.raw_rows, columns=columns) + return frame + +# %% ../nbs/02_legacy.ipynb 8 +def export_jira_issues(): + """ + Exports all JIRA issues to the data lake. + """ + jira_issues_path = api.datalake_path() / "jira_outputs" / "issues" + + def getissues(start_at, jql): + response = api.clients.jira.jql(jql, start=start_at, limit=100) + next_start = response["startAt"] + response["maxResults"] + total_rows = response["total"] + if next_start > total_rows: + next_start = total_rows + issues = response["issues"] + return next_start, total_rows, issues + + def save_date_issues(after_date: pandas.Timestamp, path=jira_issues_path): + fromdate = after_date + todate = after_date + pandas.to_timedelta("1d") + jql = f"updated >= {fromdate.date().isoformat()} and updated < {todate.date().isoformat()} order by key" + output = path / f"{fromdate.date().isoformat()}" / "issues.parquet" + if output.exists() and fromdate < pandas.Timestamp.now() - pandas.to_timedelta("1d"): + # skip previously dumped days except for last day + return None + start_at, total_rows = 0, -1 + dataframes = [] + while start_at != total_rows: + start_at, total_rows, issues = getissues(start_at, jql) + dataframes.append(pandas.DataFrame(issues)) + if start_at == 100: + logger.info(f"{total_rows} to load") + if total_rows > 1: + df = pandas.concat(dataframes) + df["fields"] = df["fields"].apply(json.dumps) + logger.info(f"saving {output}") + try: + df.to_parquet(output.open("wb")) + except Exception as exc: + print(exc) + return df + else: + return None + + after = pandas.Timestamp.now() - pandas.to_timedelta("7d") + until = pandas.Timestamp.now() + pandas.to_timedelta("1d") + + while after < until: + save_date_issues(after) + after += pandas.to_timedelta("1d") + +# %% ../nbs/02_legacy.ipynb 10 +def flatten(nested_dict, parent_key='', sep='_'): + """ + Flatten a nested dictionary. + + Args: + nested_dict (dict): The nested dictionary to flatten. + parent_key (str, optional): The parent key for the current level of nesting. + sep (str, optional): The separator to use for flattened keys. + + Returns: + dict: The flattened dictionary. + """ + flat_dict = {} + + for key, value in nested_dict.items(): + new_key = f"{parent_key}{sep}{key}" if parent_key else key + + if isinstance(value, dict): + flat_dict.update(flatten_dict(value, new_key, sep)) + else: + flat_dict[new_key] = value + + return flat_dict + +def sentinel_beautify_local( + data: dict, + outputformat: str = "jira", + default_status: str = "Onboard: MOU (T0)", + default_orgid: int = 2, +): + """ + Takes a SecurityIncident including alerts as json and returns + markdown, html and detailed json representation. + """ + for jsonfield in ["Labels", "Owner", "AdditionalData", "Comments"]: + if data.get(jsonfield): + data[jsonfield] = json.loads(data[jsonfield]) + labels = [ + f"SIEM_Severity:{data['Severity']}", + f"SIEM_Status:{data['Status']}", + f"SIEM_Title:{data['Title']}", + ] + labels += [l["labelName"] for l in data["Labels"]] # copy over labels from incident + incident_details = [data["Description"], ""] + + if data.get("Owner"): + owner = None + if data["Owner"].get("email"): + owner = data["Owner"]["email"] + elif data["Owner"].get("userPrincipalName"): + owner = data["Owner"]["userPrincipalName"] + if owner: + labels.append(f"SIEM_Owner:{owner}") + incident_details.append(f"- **Sentinel Incident Owner:** {owner}") + + if data.get("Classification"): + labels.append(f"SIEM_Classification:{data['Classification']}") + incident_details.append(f"- **Alert Classification:** {data['Classification']}") + + if data.get("ClassificationReason"): + labels.append(f"SIEM_ClassificationReason:{data['ClassificationReason']}") + incident_details.append( + f"- **Alert Classification Reason:** {data['ClassificationReason']}" + ) + + if data.get("ProviderName"): + labels.append(f"SIEM_ProviderName:{data['ProviderName']}") + incident_details.append(f"- **Provider Name:** {data['ProviderName']}") + + if data.get("AdditionalData"): + if data["AdditionalData"].get("alertProductNames"): + product_names = ",".join(data["AdditionalData"]["alertProductNames"]) + labels.append(f"SIEM_alertProductNames:{product_names}") + incident_details.append(f"- **Product Names:** {product_names}") + if data["AdditionalData"].get("tactics"): + tactics = ",".join(data["AdditionalData"]["tactics"]) + labels.append(f"SIEM_tactics:{tactics}") + incident_details.append( + f"- **[MITRE ATT&CK Tactics](https://attack.mitre.org/tactics/):** {tactics}" + ) + if data["AdditionalData"].get("techniques"): + techniques = ",".join(data["AdditionalData"]["techniques"]) + labels.append(f"SIEM_techniques:{techniques}") + incident_details.append( + "- **[MITRE ATT&CK Techniques](https://attack.mitre.org/techniques/):**" + f" {techniques}" + ) + + comments = [] + if data.get("Comments"): + if len(data["Comments"]) > 0: + comments += ["", "## Comments"] + for comment in data["Comments"]: + comments += comment["message"].split("\n") + comments += [""] + + alert_details = [] + observables = [] + entity_type_value_mappings = { + "host": "{HostName}", + "account": "{Name}", + "process": "{CommandLine}", + "file": "{Name}", + "ip": "{Address}", + "url": "{Url}", + "dns": "{DomainName}", + "registry-key": "{Hive}{Key}", + "filehash": "{Algorithm}{Value}", + } + + class Default(dict): + """ + Default dict that returns the key if the key is not found + Args: + dict + """ + + def __missing__(self, key): + return key + + for alert in data["AlertData"][:10]: # Assumes alertdata is newest to oldest + if not alert_details: + alert_details += [ + "", + "## Alert Details", + ( + "The last day of activity (up to 10 alerts) is summarised below from" + " newest to oldest." + ), + ] + alert_details.append( + f"### [{alert['AlertName']} (Severity:{alert['AlertSeverity']}) - " + + f"TimeGenerated {alert['TimeGenerated']}]({alert['AlertLink']})" + ) + alert_details.append(alert["Description"]) + for key in [ + "RemediationSteps", + "ExtendedProperties", + "Entities", + ]: # entities last as may get truncated + if alert.get(key): + if isinstance(alert[key], str) and alert[key][0] in ["{", "["]: + alert[key] = json.loads(alert[key]) + if key == "Entities": # add the entity to our list of observables + for entity in alert[key]: + observable = {"value": None} + if "Type" in entity: + observable = { + "type": entity["Type"], + "value": entity_type_value_mappings.get( + entity["Type"], "" + ).format_map(Default(entity)), + } + if not observable["value"]: # dump whole dict as string if no mapping found + observable["value"] = repr(entity) + observables.append(observable) + if alert[key] and isinstance(alert[key], list) and isinstance(alert[key][0], dict): + # if list of dicts, make a table + for index, entry in enumerate( + [flatten(item) for item in alert[key] if len(item.keys()) > 1] + ): + alert_details += ["", f"#### {key}.{index}"] + for entrykey, value in entry.items(): + if value: + alert_details.append(f"- **{entrykey}:** {value}") + elif isinstance(alert[key], dict): # if dict display as list + alert_details += ["", f"#### {key}"] + for entrykey, value in alert[key].items(): + if value and len(value) < 200: + alert_details.append(f"- **{entrykey}:** {value}") + elif value: # break out long blocks + alert_details += [f"- **{entrykey}:**", "", "```", value, "```", ""] + else: # otherwise just add as separate lines + alert_details += ["", f"#### {key}"] + [item for item in alert[key]] + + title = ( + f"SIEM Detection #{data['IncidentNumber']} Sev:{data['Severity']} -" + f" {data['Title']} (Status:{data['Status']})" + ) + mdtext = ( + [ + f"# {title}", + "", + f"## [SecurityIncident #{data['IncidentNumber']} Details]({data['IncidentUrl']})", + "", + ] + + incident_details + + comments + + alert_details + ) + mdtext = "\n".join([str(line) for line in mdtext]) + content = markdown(mdtext, extensions=["tables"]) + # remove special chars and deduplicate labels + labels = set("".join(c for c in label if c.isalnum() or c in ".:_") for label in labels) + + response = { + "subject": title, + "labels": list(labels), + "observables": [dict(ts) for ts in set(tuple(i.items()) for i in observables)], + "sentinel_data": data, + } + workspaces_df = api.list_workspaces() + customer = ( + workspaces_df[workspaces_df["customerId"] == data["TenantId"]].to_dict("records") + ) + if len(customer) > 0: + customer = customer[0] + else: + customer = {} + # Grab wiki format for jira and truncate to 32767 chars + response.update( + { + "secops_status": customer.get("SecOps Status") or default_status, + "jira_orgid": customer.get("JiraOrgId") or default_orgid, + "customer": customer, + "wikimarkup": ( + api.atlaskit_transformer(mdtext)[:32760] + ), + } + ) + return response + diff --git a/nbs/01_api.ipynb b/nbs/01_api.ipynb index 66a98b8..d94f71e 100644 --- a/nbs/01_api.ipynb +++ b/nbs/01_api.ipynb @@ -35,7 +35,7 @@ "outputs": [], "source": [ "#| export\n", - "import pandas, json, logging, time, requests, io, pkgutil\n", + "import pandas, json, logging, time, requests, io, pkgutil, httpx_cache\n", "from nbdev_squ.core import *\n", "from diskcache import memoize_stampede\n", "from importlib.metadata import version\n", @@ -43,7 +43,9 @@ "from azure.monitor.query import LogsQueryClient, LogsBatchQuery, LogsQueryStatus\n", "from azure.identity import AzureCliCredential\n", "from benedict import benedict\n", - "from functools import cached_property" + "from functools import cached_property\n", + "from atlassian import Jira\n", + "from tenable.io import TenableIO" ] }, { @@ -95,7 +97,6 @@ " \"\"\"\n", " Returns a runzero client\n", " \"\"\"\n", - " import httpx_cache\n", " return httpx_cache.Client(base_url=\"https://console.rumble.run/api/v1.0\", headers={\"Authorization\": f\"Bearer {self.config.runzero_apitoken}\"})\n", "\n", " @cached_property\n", @@ -111,7 +112,6 @@ " \"\"\"\n", " Returns a jira client\n", " \"\"\"\n", - " from atlassian import Jira\n", " return Jira(url=self.config.jira_url, username=self.config.jira_username, password=self.config.jira_password)\n", "\n", " @cached_property\n", @@ -119,7 +119,6 @@ " \"\"\"\n", " Returns a TenableIO client\n", " \"\"\"\n", - " from tenable.io import TenableIO\n", " return TenableIO(self.config.tenable_access_key, self.config.tenable_secret_key)\n", "\n", "\n", diff --git a/nbs/02_legacy.ipynb b/nbs/02_legacy.ipynb new file mode 100644 index 0000000..9ca0a8c --- /dev/null +++ b/nbs/02_legacy.ipynb @@ -0,0 +1,460 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "id": "ac93b06e-85ef-4520-991e-c8066e45533b", + "metadata": {}, + "source": [ + "# legacy\n", + "\n", + "These are some legacy utilities and integrations which are no longer actively maintained" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "96712b92-6b40-4b40-b854-4711d29f7325", + "metadata": {}, + "outputs": [], + "source": [ + "#| default_exp legacy" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "6d25975a-a4bd-4d8b-bf0c-e4d4d08765ed", + "metadata": {}, + "outputs": [], + "source": [ + "#| hide\n", + "from nbdev.showdoc import *" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "93d53d6d-7265-4ea3-a696-159c88e72b33", + "metadata": {}, + "outputs": [], + "source": [ + "#| export\n", + "from nbdev_squ import api\n", + "from markdown import markdown\n", + "from diskcache import memoize_stampede\n", + "from azure.kusto.data import KustoClient, KustoConnectionStringBuilder\n", + "import logging, pandas, json" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "52ef431c-1423-4095-8b6f-cddf8f5323eb", + "metadata": {}, + "outputs": [], + "source": [ + "logging.basicConfig(level=logging.INFO)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "1de0d5cf-eba8-42df-ba90-fd357505f054", + "metadata": {}, + "outputs": [], + "source": [ + "#| export\n", + "logger = logging.getLogger(__name__)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "df032ed2-0117-4b44-8d39-058ea6989f21", + "metadata": {}, + "outputs": [], + "source": [ + "#| exports\n", + "@memoize_stampede(api.cache, expire=60 * 5) # cache for 5 mins\n", + "def adx_query(kql):\n", + " \"\"\"\n", + " Run a kusto query\n", + "\n", + " Args:\n", + " kql (str or list): kusto query or list of queries\n", + "\n", + " Returns:\n", + " json: query results\n", + " \"\"\"\n", + " if isinstance(kql, list):\n", + " kql = [\".execute script with (ContinueOnErrors=true) <|\"] + kql\n", + " kql = \"\\n\".join(kql)\n", + " config = api.cache[\"config\"]\n", + " cluster, dx_db = config.azure_dataexplorer.rsplit(\"/\", 1)\n", + " dx_client = KustoClient(KustoConnectionStringBuilder.with_az_cli_authentication(cluster))\n", + " return dx_client.execute(dx_db, kql.replace(\"\\\\\", \"\\\\\\\\\")).primary_results[0]\n", + "\n", + "def adxtable2df(table):\n", + " \"\"\"\n", + " Return a pandas dataframe from an adx table\n", + " \"\"\"\n", + " columns = [col.column_name for col in table.columns]\n", + " frame = pandas.DataFrame(table.raw_rows, columns=columns)\n", + " return frame" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "6554837e-f47c-4e8c-9cb0-9aacc1e427b8", + "metadata": {}, + "outputs": [], + "source": [ + "adxtable2df(adx_query(\"SecurityAlert | take 10\"))" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "86df9e5a-c337-49f2-a959-f850649a6b57", + "metadata": {}, + "outputs": [], + "source": [ + "#| exports\n", + "def export_jira_issues():\n", + " \"\"\"\n", + " Exports all JIRA issues to the data lake.\n", + " \"\"\"\n", + " jira_issues_path = api.datalake_path() / \"jira_outputs\" / \"issues\"\n", + "\n", + " def getissues(start_at, jql):\n", + " response = api.clients.jira.jql(jql, start=start_at, limit=100)\n", + " next_start = response[\"startAt\"] + response[\"maxResults\"]\n", + " total_rows = response[\"total\"]\n", + " if next_start > total_rows:\n", + " next_start = total_rows\n", + " issues = response[\"issues\"]\n", + " return next_start, total_rows, issues\n", + "\n", + " def save_date_issues(after_date: pandas.Timestamp, path=jira_issues_path):\n", + " fromdate = after_date\n", + " todate = after_date + pandas.to_timedelta(\"1d\")\n", + " jql = f\"updated >= {fromdate.date().isoformat()} and updated < {todate.date().isoformat()} order by key\"\n", + " output = path / f\"{fromdate.date().isoformat()}\" / \"issues.parquet\"\n", + " if output.exists() and fromdate < pandas.Timestamp.now() - pandas.to_timedelta(\"1d\"):\n", + " # skip previously dumped days except for last day\n", + " return None\n", + " start_at, total_rows = 0, -1\n", + " dataframes = []\n", + " while start_at != total_rows:\n", + " start_at, total_rows, issues = getissues(start_at, jql)\n", + " dataframes.append(pandas.DataFrame(issues))\n", + " if start_at == 100:\n", + " logger.info(f\"{total_rows} to load\")\n", + " if total_rows > 1:\n", + " df = pandas.concat(dataframes)\n", + " df[\"fields\"] = df[\"fields\"].apply(json.dumps)\n", + " logger.info(f\"saving {output}\")\n", + " try:\n", + " df.to_parquet(output.open(\"wb\"))\n", + " except Exception as exc:\n", + " print(exc)\n", + " return df\n", + " else:\n", + " return None\n", + "\n", + " after = pandas.Timestamp.now() - pandas.to_timedelta(\"7d\")\n", + " until = pandas.Timestamp.now() + pandas.to_timedelta(\"1d\")\n", + "\n", + " while after < until:\n", + " save_date_issues(after)\n", + " after += pandas.to_timedelta(\"1d\")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "d4a7cd85-b3d6-40b9-a505-5228cee0a22d", + "metadata": {}, + "outputs": [], + "source": [ + "export_jira_issues()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "b5e09c76-fc69-45c8-a05d-369596e7abe3", + "metadata": {}, + "outputs": [], + "source": [ + "#| exports\n", + "def flatten(nested_dict, parent_key='', sep='_'):\n", + " \"\"\"\n", + " Flatten a nested dictionary.\n", + " \n", + " Args:\n", + " nested_dict (dict): The nested dictionary to flatten.\n", + " parent_key (str, optional): The parent key for the current level of nesting.\n", + " sep (str, optional): The separator to use for flattened keys.\n", + " \n", + " Returns:\n", + " dict: The flattened dictionary.\n", + " \"\"\"\n", + " flat_dict = {}\n", + " \n", + " for key, value in nested_dict.items():\n", + " new_key = f\"{parent_key}{sep}{key}\" if parent_key else key\n", + " \n", + " if isinstance(value, dict):\n", + " flat_dict.update(flatten_dict(value, new_key, sep))\n", + " else:\n", + " flat_dict[new_key] = value\n", + " \n", + " return flat_dict\n", + "\n", + "def sentinel_beautify_local(\n", + " data: dict,\n", + " outputformat: str = \"jira\",\n", + " default_status: str = \"Onboard: MOU (T0)\",\n", + " default_orgid: int = 2,\n", + "):\n", + " \"\"\"\n", + " Takes a SecurityIncident including alerts as json and returns\n", + " markdown, html and detailed json representation.\n", + " \"\"\"\n", + " for jsonfield in [\"Labels\", \"Owner\", \"AdditionalData\", \"Comments\"]:\n", + " if data.get(jsonfield):\n", + " data[jsonfield] = json.loads(data[jsonfield])\n", + " labels = [\n", + " f\"SIEM_Severity:{data['Severity']}\",\n", + " f\"SIEM_Status:{data['Status']}\",\n", + " f\"SIEM_Title:{data['Title']}\",\n", + " ]\n", + " labels += [l[\"labelName\"] for l in data[\"Labels\"]] # copy over labels from incident\n", + " incident_details = [data[\"Description\"], \"\"]\n", + "\n", + " if data.get(\"Owner\"):\n", + " owner = None\n", + " if data[\"Owner\"].get(\"email\"):\n", + " owner = data[\"Owner\"][\"email\"]\n", + " elif data[\"Owner\"].get(\"userPrincipalName\"):\n", + " owner = data[\"Owner\"][\"userPrincipalName\"]\n", + " if owner:\n", + " labels.append(f\"SIEM_Owner:{owner}\")\n", + " incident_details.append(f\"- **Sentinel Incident Owner:** {owner}\")\n", + "\n", + " if data.get(\"Classification\"):\n", + " labels.append(f\"SIEM_Classification:{data['Classification']}\")\n", + " incident_details.append(f\"- **Alert Classification:** {data['Classification']}\")\n", + "\n", + " if data.get(\"ClassificationReason\"):\n", + " labels.append(f\"SIEM_ClassificationReason:{data['ClassificationReason']}\")\n", + " incident_details.append(\n", + " f\"- **Alert Classification Reason:** {data['ClassificationReason']}\"\n", + " )\n", + "\n", + " if data.get(\"ProviderName\"):\n", + " labels.append(f\"SIEM_ProviderName:{data['ProviderName']}\")\n", + " incident_details.append(f\"- **Provider Name:** {data['ProviderName']}\")\n", + "\n", + " if data.get(\"AdditionalData\"):\n", + " if data[\"AdditionalData\"].get(\"alertProductNames\"):\n", + " product_names = \",\".join(data[\"AdditionalData\"][\"alertProductNames\"])\n", + " labels.append(f\"SIEM_alertProductNames:{product_names}\")\n", + " incident_details.append(f\"- **Product Names:** {product_names}\")\n", + " if data[\"AdditionalData\"].get(\"tactics\"):\n", + " tactics = \",\".join(data[\"AdditionalData\"][\"tactics\"])\n", + " labels.append(f\"SIEM_tactics:{tactics}\")\n", + " incident_details.append(\n", + " f\"- **[MITRE ATT&CK Tactics](https://attack.mitre.org/tactics/):** {tactics}\"\n", + " )\n", + " if data[\"AdditionalData\"].get(\"techniques\"):\n", + " techniques = \",\".join(data[\"AdditionalData\"][\"techniques\"])\n", + " labels.append(f\"SIEM_techniques:{techniques}\")\n", + " incident_details.append(\n", + " \"- **[MITRE ATT&CK Techniques](https://attack.mitre.org/techniques/):**\"\n", + " f\" {techniques}\"\n", + " )\n", + "\n", + " comments = []\n", + " if data.get(\"Comments\"):\n", + " if len(data[\"Comments\"]) > 0:\n", + " comments += [\"\", \"## Comments\"]\n", + " for comment in data[\"Comments\"]:\n", + " comments += comment[\"message\"].split(\"\\n\")\n", + " comments += [\"\"]\n", + "\n", + " alert_details = []\n", + " observables = []\n", + " entity_type_value_mappings = {\n", + " \"host\": \"{HostName}\",\n", + " \"account\": \"{Name}\",\n", + " \"process\": \"{CommandLine}\",\n", + " \"file\": \"{Name}\",\n", + " \"ip\": \"{Address}\",\n", + " \"url\": \"{Url}\",\n", + " \"dns\": \"{DomainName}\",\n", + " \"registry-key\": \"{Hive}{Key}\",\n", + " \"filehash\": \"{Algorithm}{Value}\",\n", + " }\n", + "\n", + " class Default(dict):\n", + " \"\"\"\n", + " Default dict that returns the key if the key is not found\n", + " Args:\n", + " dict\n", + " \"\"\"\n", + "\n", + " def __missing__(self, key):\n", + " return key\n", + "\n", + " for alert in data[\"AlertData\"][:10]: # Assumes alertdata is newest to oldest\n", + " if not alert_details:\n", + " alert_details += [\n", + " \"\",\n", + " \"## Alert Details\",\n", + " (\n", + " \"The last day of activity (up to 10 alerts) is summarised below from\"\n", + " \" newest to oldest.\"\n", + " ),\n", + " ]\n", + " alert_details.append(\n", + " f\"### [{alert['AlertName']} (Severity:{alert['AlertSeverity']}) - \"\n", + " + f\"TimeGenerated {alert['TimeGenerated']}]({alert['AlertLink']})\"\n", + " )\n", + " alert_details.append(alert[\"Description\"])\n", + " for key in [\n", + " \"RemediationSteps\",\n", + " \"ExtendedProperties\",\n", + " \"Entities\",\n", + " ]: # entities last as may get truncated\n", + " if alert.get(key):\n", + " if isinstance(alert[key], str) and alert[key][0] in [\"{\", \"[\"]:\n", + " alert[key] = json.loads(alert[key])\n", + " if key == \"Entities\": # add the entity to our list of observables\n", + " for entity in alert[key]:\n", + " observable = {\"value\": None}\n", + " if \"Type\" in entity:\n", + " observable = {\n", + " \"type\": entity[\"Type\"],\n", + " \"value\": entity_type_value_mappings.get(\n", + " entity[\"Type\"], \"\"\n", + " ).format_map(Default(entity)),\n", + " }\n", + " if not observable[\"value\"]: # dump whole dict as string if no mapping found\n", + " observable[\"value\"] = repr(entity)\n", + " observables.append(observable)\n", + " if alert[key] and isinstance(alert[key], list) and isinstance(alert[key][0], dict):\n", + " # if list of dicts, make a table\n", + " for index, entry in enumerate(\n", + " [flatten(item) for item in alert[key] if len(item.keys()) > 1]\n", + " ):\n", + " alert_details += [\"\", f\"#### {key}.{index}\"]\n", + " for entrykey, value in entry.items():\n", + " if value:\n", + " alert_details.append(f\"- **{entrykey}:** {value}\")\n", + " elif isinstance(alert[key], dict): # if dict display as list\n", + " alert_details += [\"\", f\"#### {key}\"]\n", + " for entrykey, value in alert[key].items():\n", + " if value and len(value) < 200:\n", + " alert_details.append(f\"- **{entrykey}:** {value}\")\n", + " elif value: # break out long blocks\n", + " alert_details += [f\"- **{entrykey}:**\", \"\", \"```\", value, \"```\", \"\"]\n", + " else: # otherwise just add as separate lines\n", + " alert_details += [\"\", f\"#### {key}\"] + [item for item in alert[key]]\n", + "\n", + " title = (\n", + " f\"SIEM Detection #{data['IncidentNumber']} Sev:{data['Severity']} -\"\n", + " f\" {data['Title']} (Status:{data['Status']})\"\n", + " )\n", + " mdtext = (\n", + " [\n", + " f\"# {title}\",\n", + " \"\",\n", + " f\"## [SecurityIncident #{data['IncidentNumber']} Details]({data['IncidentUrl']})\",\n", + " \"\",\n", + " ]\n", + " + incident_details\n", + " + comments\n", + " + alert_details\n", + " )\n", + " mdtext = \"\\n\".join([str(line) for line in mdtext])\n", + " content = markdown(mdtext, extensions=[\"tables\"])\n", + " # remove special chars and deduplicate labels\n", + " labels = set(\"\".join(c for c in label if c.isalnum() or c in \".:_\") for label in labels)\n", + "\n", + " response = {\n", + " \"subject\": title,\n", + " \"labels\": list(labels),\n", + " \"observables\": [dict(ts) for ts in set(tuple(i.items()) for i in observables)],\n", + " \"sentinel_data\": data,\n", + " }\n", + " workspaces_df = api.list_workspaces()\n", + " customer = (\n", + " workspaces_df[workspaces_df[\"customerId\"] == data[\"TenantId\"]].to_dict(\"records\")\n", + " )\n", + " if len(customer) > 0:\n", + " customer = customer[0]\n", + " else:\n", + " customer = {}\n", + " # Grab wiki format for jira and truncate to 32767 chars\n", + " response.update(\n", + " {\n", + " \"secops_status\": customer.get(\"SecOps Status\") or default_status,\n", + " \"jira_orgid\": customer.get(\"JiraOrgId\") or default_orgid,\n", + " \"customer\": customer,\n", + " \"wikimarkup\": (\n", + " api.atlaskit_transformer(mdtext)[:32760]\n", + " ),\n", + " }\n", + " )\n", + " return response\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "7e2c1cb2-5d3f-4c33-af56-3a4304d197ae", + "metadata": {}, + "outputs": [], + "source": [ + "# grab latest incident with alerts\n", + "incident = api.security_incidents().dropna(subset=[\"AlertIds\"]).iloc[0]\n", + "df = api.security_alerts()\n", + "df = df[df[\"TenantId\"] == incident[\"TenantId\"]]\n", + "alertids = json.loads(incident[\"AlertIds\"])\n", + "# extend incident with alert info\n", + "incident[\"AlertData\"] = df[df[\"SystemAlertId\"].isin(alertids)].copy(deep=True).to_dict(orient=\"records\")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "ad1e416d-bbf0-4512-bcc6-bcc3cd02831b", + "metadata": {}, + "outputs": [], + "source": [ + "# convert to a jira friendly format\n", + "sentinel_beautify_local(incident.to_dict())" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "ef560e4c-4b15-4d91-b09b-31cb1e12f8bd", + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "python3", + "language": "python", + "name": "python3" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/nbs/index.ipynb b/nbs/index.ipynb index 9269e9d..03975a6 100644 --- a/nbs/index.ipynb +++ b/nbs/index.ipynb @@ -28,13 +28,13 @@ "Below is how to install in a plain python 3.11+ environment\n", "\n", "```sh\n", - "https://github.com/wagov/nbdev-squ/releases/download/v1.3.2/nbdev_squ-1.3.2-py3-none-any.whl\n", + "pip install nbdev-squ\n", "```\n", "\n", "The installation can also be run in a notebook (we tend to use [JupyterLab Desktop](https://github.com/jupyterlab/jupyterlab-desktop) for local dev). The `SQU_CONFIG` env var indicates to nbdev_squ it should load the json secret *squconfig-`my_keyvault_tenantid`* from the `my_kevault_name` keyvault.\n", "\n", "```python\n", - "%pip install https://github.com/wagov/nbdev-squ/releases/download/v1.3.2/nbdev_squ-1.3.2-py3-none-any.whl\n", + "%pip install nbdev-squ\n", "import os; os.environ[\"SQU_CONFIG\"] = \"{{ my_keyvault_name }}/{{ my_keyvault_tenantid }}\" \n", "\n", "from nbdev_squ import api\n", diff --git a/nbs/sidebar.yml b/nbs/sidebar.yml index 59764f5..e1c5bb0 100644 --- a/nbs/sidebar.yml +++ b/nbs/sidebar.yml @@ -4,3 +4,4 @@ website: - index.ipynb - 00_core.ipynb - 01_api.ipynb + - 02_legacy.ipynb diff --git a/settings.ini b/settings.ini index 89216c3..e638fa3 100644 --- a/settings.ini +++ b/settings.ini @@ -1,7 +1,7 @@ [DEFAULT] repo = nbdev-squ lib_name = nbdev-squ -version = 1.3.2 +version = 1.3.3 min_python = 3.10 license = apache2 black_formatting = False @@ -27,7 +27,7 @@ keywords = nbdev jupyter notebook python language = English status = 3 user = adonm -requirements = tenacity platformdirs universal-pathlib adlfs diskcache azure-cli>=2.58 azure-monitor-query azure-kusto-data atlassian-python-api abuseipdb_wrapper>=0.1.9 pysigma pandas pyarrow dask python-benedict pytenable httpx_cache msticpy[azsentinel] +requirements = tenacity platformdirs universal-pathlib adlfs diskcache azure-cli>=2.58 azure-monitor-query azure-kusto-data atlassian-python-api abuseipdb_wrapper>=0.2 pysigma pandas pyarrow dask python-benedict markdown pytenable httpx_cache msticpy[azsentinel] readme_nb = index.ipynb allowed_metadata_keys = allowed_cell_metadata_keys =