Skip to content

Commit

Permalink
core: disabling cachew through CACHEW_DISABLE env (#56)
Browse files Browse the repository at this point in the history
* core: disabling cachew through CACHEW_DISABLE env
  • Loading branch information
seanbreckenridge committed Sep 22, 2023
1 parent b17f5e2 commit f71a505
Show file tree
Hide file tree
Showing 2 changed files with 117 additions and 0 deletions.
39 changes: 39 additions & 0 deletions doc/cachew_disable.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,39 @@
Can put this in the README.md once its been tested a bit

### Disable through Environment Variables

To disable a `cachew` function in some module, you can use the `CACHEW_DISABLE` environment variable. This is a colon-delimited (like a `$PATH`) list of modules to disable. It disables modules given some name recursively, and supports [unix-style globs](https://docs.python.org/3/library/fnmatch.html)

For example, say you were using [HPI](https://github.com/karlicoss/HPI) which internally uses a snippet like `mcachew` above. You may want to enable `cachew` for _most_ modules, but disable them for specific ones. For example take:

```
my/browser
├── active_browser.py
├── all.py
├── common.py
└── export.py
my/reddit
├── __init__.py
├── all.py
├── common.py
├── pushshift.py
└── rexport.py
```

To disable `cachew` in all of these files: `export CACHEW_DISABLE=my.browser:my.reddit` (disables for all submodules)

To disable just for a particular module: `export CACHEW_DISABLE='my.browser.export'`

Similarly to `$PATH` manipulations, you can do this in your shell configuration incrementally:

```
CACHEW_DISABLE='my.reddit.rexport'
if some condition...; then
CACHEW_DISABLE="my.browser.export:$CACHEW_DISABLE"
fi
export CACHEW_DISABLE
```

You can also use globs, e.g. `CACHEW_DISABLE='my.*.gdpr`

To disable `cachew` everywhere, you could set `export CACHEW_DISABLE='*'`
78 changes: 78 additions & 0 deletions src/cachew/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
import json
import logging
from pathlib import Path
import os
import stat
import sys
from typing import (
Expand Down Expand Up @@ -486,6 +487,78 @@ def callable_name(func: Callable) -> str:
mod = getattr(func, '__module__', None) or ''
return f'{mod}:{func.__qualname__}'

def callable_module_name(func: Callable) -> Optional[str]:
return getattr(func, '__module__', None)

# could cache this, but might be worth not to, so the user can change it on the fly?
def _parse_disabled_modules(logger: Optional[logging.Logger] = None) -> List[str]:
# e.g. CACHEW_DISABLE=my.browser:my.reddit
if 'CACHEW_DISABLE' not in os.environ:
return []
disabled = os.environ['CACHEW_DISABLE']
if disabled.strip() == '':
return []
if ',' in disabled and logger:
logger.warning('CACHEW_DISABLE contains a comma, but this expects a $PATH-like, colon-separated list; '
f'try something like CACHEW_DISABLE={disabled.replace(",", ":")}')
# remove any empty strings incase did something like CACHEW_DISABLE=my.module:$CACHEW_DISABLE
return [p for p in disabled.split(':') if p.strip() != '']


def _matches_disabled_module(module_name: str, pattern: str) -> bool:
'''
>>> _matches_disabled_module('my.browser', 'my.browser')
True
>>> _matches_disabled_module('my.browser', 'my.*')
True
>>> _matches_disabled_module('my.browser', 'my')
True
>>> _matches_disabled_module('my.browser', 'my.browse*')
True
>>> _matches_disabled_module('my.browser.export', 'my.browser')
True
>>> _matches_disabled_module('mysomething.else', '*') # CACHEW_DISABLE='*' disables everything
True
>>> _matches_disabled_module('my.browser', 'my.br?????') # fnmatch supports unix-like patterns
True
>>> _matches_disabled_module('my.browser', 'my.browse')
False
>>> _matches_disabled_module('mysomething.else', 'my') # since not at '.' boundary, doesn't match
False
>>> _matches_disabled_module('mysomething.else', '')
False
>>> _matches_disabled_module('my.browser', 'my.browser.export')
False
'''
import fnmatch

if module_name == pattern:
return True

module_parts = module_name.split('.')
pattern_parts = pattern.split('.')

# e.g. if pattern is 'module.submod.inner_module' and module is just 'module.submod'
# theres no possible way for it to match
if len(module_parts) < len(pattern_parts):
return False

for mp, pp in zip(module_parts, pattern_parts):
if fnmatch.fnmatch(mp, pp):
continue
else:
return False
return True

def _module_is_disabled(module_name: str, logger: logging.Logger) -> bool:

disabled_modules = _parse_disabled_modules(logger)
for pat in disabled_modules:
if _matches_disabled_module(module_name, pat):
logger.debug(f'caching disabled for {module_name} '
f"(matched '{pat}' from 'CACHEW_DISABLE={os.environ['CACHEW_DISABLE']})'")
return True
return False

# fmt: off
_CACHEW_CACHED = 'cachew_cached' # TODO add to docs
Expand Down Expand Up @@ -567,6 +640,11 @@ def cachew_wrapper(
yield from func(*args, **kwargs)
return

mod_name = callable_module_name(func)
if mod_name is not None and _module_is_disabled(mod_name, logger):
yield from func(*args, **kwargs)
return

def get_db_path() -> Optional[Path]:
db_path: Path
if callable(cache_path):
Expand Down

0 comments on commit f71a505

Please sign in to comment.