Skip to content

Commit

Permalink
cache safely tweaks
Browse files Browse the repository at this point in the history
Adjusted a lot of memoization to use safer caching approach
  • Loading branch information
adonm committed Apr 16, 2024
1 parent cd88d05 commit 68c1a03
Show file tree
Hide file tree
Showing 11 changed files with 57 additions and 37 deletions.
3 changes: 2 additions & 1 deletion .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -150,4 +150,5 @@ checklink/cookies.txt
.pkg

# Quarto
.quarto
.quarto
/.jupyter
4 changes: 3 additions & 1 deletion build.sh
Original file line number Diff line number Diff line change
Expand Up @@ -3,4 +3,6 @@
npm ci
npm run build
pip install build==1.2.1
python -m build
python -m build
# To release to pypi run below after a build
# twine upload --skip-existing dist/*
2 changes: 1 addition & 1 deletion nbdev_squ/__init__.py
Original file line number Diff line number Diff line change
@@ -1 +1 @@
__version__ = "1.3.4"
__version__ = "1.3.5"
3 changes: 3 additions & 0 deletions nbdev_squ/_modidx.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,8 +16,10 @@
'nbdev_squ.api.finalise_query': ('api.html#finalise_query', 'nbdev_squ/api.py'),
'nbdev_squ.api.hunt': ('api.html#hunt', 'nbdev_squ/api.py'),
'nbdev_squ.api.list_securityinsights': ('api.html#list_securityinsights', 'nbdev_squ/api.py'),
'nbdev_squ.api.list_securityinsights_safe': ('api.html#list_securityinsights_safe', 'nbdev_squ/api.py'),
'nbdev_squ.api.list_subscriptions': ('api.html#list_subscriptions', 'nbdev_squ/api.py'),
'nbdev_squ.api.list_workspaces': ('api.html#list_workspaces', 'nbdev_squ/api.py'),
'nbdev_squ.api.list_workspaces_safe': ('api.html#list_workspaces_safe', 'nbdev_squ/api.py'),
'nbdev_squ.api.loganalytics_query': ('api.html#loganalytics_query', 'nbdev_squ/api.py'),
'nbdev_squ.api.query_all': ('api.html#query_all', 'nbdev_squ/api.py'),
'nbdev_squ.api.security_alerts': ('api.html#security_alerts', 'nbdev_squ/api.py'),
Expand All @@ -26,6 +28,7 @@
'nbdev_squ.core': { 'nbdev_squ.core._cli': ('core.html#_cli', 'nbdev_squ/core.py'),
'nbdev_squ.core.azcli': ('core.html#azcli', 'nbdev_squ/core.py'),
'nbdev_squ.core.datalake_path': ('core.html#datalake_path', 'nbdev_squ/core.py'),
'nbdev_squ.core.datalake_path_safe': ('core.html#datalake_path_safe', 'nbdev_squ/core.py'),
'nbdev_squ.core.load_config': ('core.html#load_config', 'nbdev_squ/core.py'),
'nbdev_squ.core.login': ('core.html#login', 'nbdev_squ/core.py')},
'nbdev_squ.legacy': { 'nbdev_squ.legacy.adx_query': ('legacy.html#adx_query', 'nbdev_squ/legacy.py'),
Expand Down
26 changes: 17 additions & 9 deletions nbdev_squ/api.py
Original file line number Diff line number Diff line change
@@ -1,9 +1,9 @@
# AUTOGENERATED! DO NOT EDIT! File to edit: ../nbs/01_api.ipynb.

# %% auto 0
__all__ = ['logger', 'clients', 'columns_of_interest', 'columns', 'Clients', 'list_workspaces', 'list_subscriptions',
'list_securityinsights', 'chunks', 'loganalytics_query', 'query_all', 'finalise_query', 'hunt',
'atlaskit_transformer', 'security_incidents', 'security_alerts']
__all__ = ['logger', 'clients', 'columns_of_interest', 'columns', 'Clients', 'list_workspaces_safe', 'list_workspaces',
'list_subscriptions', 'list_securityinsights_safe', 'list_securityinsights', 'chunks', 'loganalytics_query',
'query_all', 'finalise_query', 'hunt', 'atlaskit_transformer', 'security_incidents', 'security_alerts']

# %% ../nbs/01_api.ipynb 3
import pandas, json, logging, time, requests, io, pkgutil, httpx_cache
Expand Down Expand Up @@ -65,13 +65,20 @@ def tio(self):

# %% ../nbs/01_api.ipynb 13
@memoize_stampede(cache, expire=60 * 60 * 3) # cache for 3 hours
def list_workspaces(fmt: str = "df", # df, csv, json, list
def list_workspaces_safe(fmt: str = "df", # df, csv, json, list
agency: str = "ALL"): # Agency alias or ALL
path = datalake_path()
df = pandas.read_csv((path / "notebooks/lists/SentinelWorkspaces.csv").open())
df = df.join(pandas.read_csv((path / "notebooks/lists/SecOps Groups.csv").open()).set_index("Alias"), on="SecOps Group", rsuffix="_secops")
df = df.rename(columns={"SecOps Group": "alias", "Domains and IPs": "domains"})
df = df.dropna(subset=["customerId"]).sort_values(by="alias").convert_dtypes().reset_index()
persisted = f"{dirs.user_cache_dir}/list_workspaces.parquet"
df.to_parquet(persisted)
return persisted

def list_workspaces(fmt: str = "df", # df, csv, json, list
agency: str = "ALL"): # Agency alias or ALL
df = pandas.read_parquet(list_workspaces_safe(fmt, agency))
if agency != "ALL":
df = df[df["alias"] == agency]
if fmt == "df":
Expand All @@ -86,13 +93,12 @@ def list_workspaces(fmt: str = "df", # df, csv, json, list
raise ValueError("Invalid format")

# %% ../nbs/01_api.ipynb 17
@memoize_stampede(cache, expire=60 * 60 * 3) # cache for 3 hours
def list_subscriptions():
return pandas.DataFrame(azcli(["account", "list"]))["id"].unique()

@memoize_stampede(cache, expire=60 * 60 * 3) # cache for 3 hours
def list_securityinsights():
return pandas.DataFrame(azcli([
def list_securityinsights_safe():
return azcli([
"graph", "query", "--first", "1000", "-q",
"""
resources
Expand All @@ -104,14 +110,16 @@ def list_securityinsights():
on wlid
| extend customerId = properties.customerId
"""
])["data"])
])["data"]

def list_securityinsights():
return pandas.DataFrame(list_securityinsights_safe())

def chunks(items, size):
# Yield successive `size` chunks from `items`
for i in range(0, len(items), size):
yield items[i:i + size]

@memoize_stampede(cache, expire=60 * 5) # cache for 5 mins
def loganalytics_query(queries: list[str], timespan=pandas.Timedelta("14d"), batch_size=190, batch_delay=32, sentinel_workspaces = None):
"""
Run queries across all workspaces, in batches of `batch_size` with a minimum delay of `batch_delay` between batches.
Expand Down
12 changes: 8 additions & 4 deletions nbdev_squ/core.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
# AUTOGENERATED! DO NOT EDIT! File to edit: ../nbs/00_core.ipynb.

# %% auto 0
__all__ = ['logger', 'dirs', 'cache', 'retryer', 'load_config', 'login', 'azcli', 'datalake_path']
__all__ = ['logger', 'dirs', 'cache', 'retryer', 'load_config', 'login', 'azcli', 'datalake_path_safe', 'datalake_path']

# %% ../nbs/00_core.ipynb 3
import logging, subprocess, os, sys, pandas
Expand Down Expand Up @@ -102,14 +102,18 @@ def azcli(basecmd: list[str]):

# %% ../nbs/00_core.ipynb 16
@memoize_stampede(cache, expire=60 * 60 * 24)
def datalake_path(expiry_days: int=3, # Number of days until the SAS token expires
permissions: str="racwdlt" # Permissions to grant on the SAS token
):
def datalake_path_safe(expiry_days, permissions):
if not cache.get("logged_in"): # Have to login to grab keyvault config
login()
expiry = pandas.Timestamp("now") + pandas.Timedelta(days=expiry_days)
account = cache["config"]["datalake_account"].split(".")[0] # Grab the account name, not the full FQDN
container = cache['config']['datalake_container']
sas = azcli(["storage", "container", "generate-sas", "--auth-mode", "login", "--as-user",
"--account-name", account, "--name", container, "--permissions", permissions, "--expiry", str(expiry.date())])
return (container, account, sas)

def datalake_path(expiry_days: int=3, # Number of days until the SAS token expires
permissions: str="racwdlt" # Permissions to grant on the SAS token
):
container, account, sas = datalake_path_safe(expiry_days, permissions)
return UPath(f"az://{container}", account_name=account, sas_token=sas)
1 change: 0 additions & 1 deletion nbdev_squ/legacy.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,6 @@
logger = logging.getLogger(__name__)

# %% ../nbs/02_legacy.ipynb 6
@memoize_stampede(api.cache, expire=60 * 5) # cache for 5 mins
def adx_query(kql):
"""
Run a kusto query
Expand Down
10 changes: 7 additions & 3 deletions nbs/00_core.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -235,16 +235,20 @@
"source": [
"#| exports\n",
"@memoize_stampede(cache, expire=60 * 60 * 24)\n",
"def datalake_path(expiry_days: int=3, # Number of days until the SAS token expires\n",
" permissions: str=\"racwdlt\" # Permissions to grant on the SAS token\n",
" ):\n",
"def datalake_path_safe(expiry_days, permissions):\n",
" if not cache.get(\"logged_in\"): # Have to login to grab keyvault config\n",
" login()\n",
" expiry = pandas.Timestamp(\"now\") + pandas.Timedelta(days=expiry_days)\n",
" account = cache[\"config\"][\"datalake_account\"].split(\".\")[0] # Grab the account name, not the full FQDN\n",
" container = cache['config']['datalake_container']\n",
" sas = azcli([\"storage\", \"container\", \"generate-sas\", \"--auth-mode\", \"login\", \"--as-user\",\n",
" \"--account-name\", account, \"--name\", container, \"--permissions\", permissions, \"--expiry\", str(expiry.date())])\n",
" return (container, account, sas)\n",
"\n",
"def datalake_path(expiry_days: int=3, # Number of days until the SAS token expires\n",
" permissions: str=\"racwdlt\" # Permissions to grant on the SAS token\n",
" ):\n",
" container, account, sas = datalake_path_safe(expiry_days, permissions)\n",
" return UPath(f\"az://{container}\", account_name=account, sas_token=sas)"
]
},
Expand Down
20 changes: 14 additions & 6 deletions nbs/01_api.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -183,13 +183,20 @@
"source": [
"#| exports\n",
"@memoize_stampede(cache, expire=60 * 60 * 3) # cache for 3 hours\n",
"def list_workspaces(fmt: str = \"df\", # df, csv, json, list\n",
"def list_workspaces_safe(fmt: str = \"df\", # df, csv, json, list\n",
" agency: str = \"ALL\"): # Agency alias or ALL\n",
" path = datalake_path()\n",
" df = pandas.read_csv((path / \"notebooks/lists/SentinelWorkspaces.csv\").open())\n",
" df = df.join(pandas.read_csv((path / \"notebooks/lists/SecOps Groups.csv\").open()).set_index(\"Alias\"), on=\"SecOps Group\", rsuffix=\"_secops\")\n",
" df = df.rename(columns={\"SecOps Group\": \"alias\", \"Domains and IPs\": \"domains\"})\n",
" df = df.dropna(subset=[\"customerId\"]).sort_values(by=\"alias\").convert_dtypes().reset_index()\n",
" persisted = f\"{dirs.user_cache_dir}/list_workspaces.parquet\"\n",
" df.to_parquet(persisted)\n",
" return persisted\n",
"\n",
"def list_workspaces(fmt: str = \"df\", # df, csv, json, list\n",
" agency: str = \"ALL\"): # Agency alias or ALL\n",
" df = pandas.read_parquet(list_workspaces_safe(fmt, agency))\n",
" if agency != \"ALL\":\n",
" df = df[df[\"alias\"] == agency]\n",
" if fmt == \"df\":\n",
Expand Down Expand Up @@ -236,13 +243,12 @@
"outputs": [],
"source": [
"#| exports\n",
"@memoize_stampede(cache, expire=60 * 60 * 3) # cache for 3 hours\n",
"def list_subscriptions():\n",
" return pandas.DataFrame(azcli([\"account\", \"list\"]))[\"id\"].unique()\n",
"\n",
"@memoize_stampede(cache, expire=60 * 60 * 3) # cache for 3 hours\n",
"def list_securityinsights():\n",
" return pandas.DataFrame(azcli([\n",
"def list_securityinsights_safe():\n",
" return azcli([\n",
" \"graph\", \"query\", \"--first\", \"1000\", \"-q\", \n",
" \"\"\"\n",
" resources\n",
Expand All @@ -254,14 +260,16 @@
" on wlid\n",
" | extend customerId = properties.customerId\n",
" \"\"\"\n",
" ])[\"data\"])\n",
" ])[\"data\"]\n",
"\n",
"def list_securityinsights():\n",
" return pandas.DataFrame(list_securityinsights_safe())\n",
"\n",
"def chunks(items, size):\n",
" # Yield successive `size` chunks from `items`\n",
" for i in range(0, len(items), size):\n",
" yield items[i:i + size]\n",
"\n",
"@memoize_stampede(cache, expire=60 * 5) # cache for 5 mins\n",
"def loganalytics_query(queries: list[str], timespan=pandas.Timedelta(\"14d\"), batch_size=190, batch_delay=32, sentinel_workspaces = None):\n",
" \"\"\"\n",
" Run queries across all workspaces, in batches of `batch_size` with a minimum delay of `batch_delay` between batches.\n",
Expand Down
9 changes: 0 additions & 9 deletions nbs/02_legacy.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -75,7 +75,6 @@
"outputs": [],
"source": [
"#| exports\n",
"@memoize_stampede(api.cache, expire=60 * 5) # cache for 5 mins\n",
"def adx_query(kql):\n",
" \"\"\"\n",
" Run a kusto query\n",
Expand Down Expand Up @@ -438,14 +437,6 @@
"# convert to a jira friendly format\n",
"sentinel_beautify_local(incident.to_dict())"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "ef560e4c-4b15-4d91-b09b-31cb1e12f8bd",
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
Expand Down
4 changes: 2 additions & 2 deletions settings.ini
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
[DEFAULT]
repo = nbdev-squ
lib_name = nbdev-squ
version = 1.3.4
version = 1.3.5
min_python = 3.10
license = apache2
black_formatting = False
Expand All @@ -27,7 +27,7 @@ keywords = nbdev jupyter notebook python
language = English
status = 3
user = adonm
requirements = tenacity platformdirs universal-pathlib adlfs diskcache azure-cli>=2.58 azure-monitor-query azure-kusto-data atlassian-python-api abuseipdb_wrapper>=0.2 pysigma pandas pyarrow dask python-benedict markdown pytenable httpx_cache msticpy[azsentinel]
requirements = tenacity platformdirs universal-pathlib adlfs diskcache azure-cli>=2.59 azure-monitor-query azure-kusto-data atlassian-python-api abuseipdb_wrapper>=0.2 pysigma pandas pyarrow dask python-benedict markdown pytenable httpx_cache msticpy[azsentinel]
readme_nb = index.ipynb
allowed_metadata_keys =
allowed_cell_metadata_keys =
Expand Down

0 comments on commit 68c1a03

Please sign in to comment.