From f2b82d32e84d8ac8c48842dd0c3f5608fc0447f0 Mon Sep 17 00:00:00 2001 From: Kostis Anagnostopoulos Date: Mon, 1 Mar 2021 16:41:51 +0200 Subject: [PATCH] FEAT: --update/overwrite option TODOed by #20, - CLI options described in the 2nd case explained in #211, due to simplicity. - The precedence for deciding update/overwrite: env-var, --update/overwrite, --source exist? - Function defaults are false, as suggested in [#20](https://github.com/karlicoss/promnesia/issues/20#issuecomment-778382929). - Both index & demo updated. - Env-var now checks its value one of (update|overwrite). - All update/overwrite decision logic moved to __main_. --- src/promnesia/__main__.py | 57 ++++++++++++++++++++++++++++++++++----- src/promnesia/dump.py | 18 +++---------- 2 files changed, 55 insertions(+), 20 deletions(-) diff --git a/src/promnesia/__main__.py b/src/promnesia/__main__.py index 625c29b5..1e68ae4b 100644 --- a/src/promnesia/__main__.py +++ b/src/promnesia/__main__.py @@ -2,6 +2,7 @@ import os import logging import inspect +import os import sys from typing import List, Tuple, Optional, Dict, Sequence, Iterable, Iterator from pathlib import Path @@ -66,7 +67,7 @@ def iter_all_visits(sources_subset: Iterable[str]=()) -> Iterator[Res[DbVisit]]: yield e -def _do_index(dry: bool=False, sources_subset: Iterable[str]=()) -> Iterable[Exception]: +def _do_index(dry: bool=False, sources_subset: Iterable[str]=(), overwrite_db=False) -> Iterable[Exception]: # also keep & return errors for further display errors: List[Exception] = [] def it() -> Iterable[Res[DbVisit]]: @@ -81,17 +82,23 @@ def it() -> Iterable[Res[DbVisit]]: for v in res: print(v) else: - dump_errors = visits_to_sqlite(it()) + dump_errors = visits_to_sqlite(it(), overwrite_db) for e in dump_errors: logger.exception(e) errors.append(e) return errors -def do_index(config_file: Path, dry: bool=False, sources_subset: Iterable[str]=()) -> None: +def do_index( + config_file: Path, + dry: bool=False, + sources_subset: Iterable[str]=(), + overwrite: bool=None, + overwrite_db=False, + ) -> None: config.load_from(config_file) # meh.. should be cleaner try: - errors = list(_do_index(dry=dry, sources_subset=sources_subset)) + errors = list(_do_index(dry=dry, sources_subset=sources_subset, overwrite_db=overwrite_db)) finally: config.reset() if len(errors) > 0: @@ -131,6 +138,7 @@ def do_demo(* config_file: Optional[Path], name: str='demo', sources_subset: Iterable[str]=(), + overwrite_db: bool=False, ) -> None: from pprint import pprint with TemporaryDirectory() as tdir: @@ -147,7 +155,7 @@ def do_demo(* ) config.instance = cfg - errors = list(_do_index(sources_subset=sources_subset)) + errors = list(_do_index(sources_subset=sources_subset, overwrite_db=overwrite_db)) if len(errors) > 0: logger.error('%d errors during indexing (see logs above for backtraces)', len(errors)) for e in errors: @@ -293,6 +301,22 @@ def main() -> None: ep.add_argument('--intermediate', required=False, help="Used for development, you don't need it") ep.add_argument('--sources', required=False, action="extend", nargs="+", type=_parse_ordinal_or_name, help="Subset of source(s) to run (name or 0-indexed position); use `promnesia --dry` to view sources") + overwrite = ep.add_mutually_exclusive_group() + overwrite.add_argument( + '--update', + required=False, + action="store_const", + const=True, + dest="overwrite_db", + help= + "Keep existing visits in db and merge new ones collected." + " If neither is given, --update assumed when --sources given (the default)" + ", unless PROMNESIA_INDEX_POLICY=(update|overwrite) env-var defined" + ", which takes precendance." + " Conflicts with --update.%(default)0.0s" + ) + overwrite.add_argument('--overwrite', required=False, action="store_const", const=False, dest="overwrite_db", + help="The opposite of --update: recreate db with newly indexed visits%(default)0.0s") sp = subp.add_parser('serve', help='Serve a link database', formatter_class=F) # type: ignore server.setup_parser(sp) @@ -348,13 +372,33 @@ def main() -> None: p.print_help(sys.stderr) sys.exit(1) + overwrite_policy_var = os.environ.get("PROMNESIA_INDEX_POLICY") + if overwrite_policy_var: + overwrite_policy_var = overwrite_policy_var.lower() + if overwrite_policy_var not in ("update", "overwrite"): + print( + f"Invalid value for PROMNESIA_INDEX_POLICY env-var: {overwrite_policy_var}" + "\n Must be one of (update | overwrite).", + file=sys.stderr) + sys.exit(2) + args.overwrite_db = overwrite_policy_var == "overwrite" + if args.overwrite_db is None: + args.overwrite_db = not bool(args.sources) + + logger.info("CLI args: %s", args) + # TODO maybe, it's better for server to compute intermediate represetnation? # the only downside is storage. dunno. # worst case -- could use database? with get_tmpdir() as tdir: # TODO?? if args.mode == 'index': - do_index(config_file=args.config, dry=args.dry, sources_subset=args.sources) + do_index( + config_file=args.config, + dry=args.dry, + sources_subset=args.sources, + overwrite_db=args.overwrite_db, + ) elif args.mode == 'serve': server.run(args) elif args.mode == 'demo': @@ -367,6 +411,7 @@ def main() -> None: config_file=args.config, name=args.name, sources_subset=args.sources, + overwrite_db=args.overwrite_db, ) elif args.mode == 'install-server': # todo rename to 'autostart' or something? install_server.install(args) diff --git a/src/promnesia/dump.py b/src/promnesia/dump.py index 49198909..f42e325e 100644 --- a/src/promnesia/dump.py +++ b/src/promnesia/dump.py @@ -1,4 +1,3 @@ -import os from pathlib import Path import shutil from typing import Dict, List, Tuple, Set, Iterable @@ -14,14 +13,6 @@ from . import config -def update_policy_active() -> bool: - # NOTE: experimental.. need to make it a proper cmdline argument later - INDEX_POLICY = os.environ.get('PROMNESIA_INDEX_POLICY', 'overwrite_all') - # if 'update' is passed, will run against the existing db and only tough the sources present in the current index run - # not sue if a good name for this.. - return INDEX_POLICY == 'update' - - # NOTE: I guess the main performance benefit from this is not creating too many tmp lists and avoiding overhead # since as far as sql is concerned it should all be in the same transaction. only a guess # not sure it's the proper way to handle it @@ -30,7 +21,7 @@ def update_policy_active() -> bool: # returns critical warnings -def visits_to_sqlite(vit: Iterable[Res[DbVisit]]) -> List[Exception]: +def visits_to_sqlite(vit: Iterable[Res[DbVisit]], *, overwrite_db: bool) -> List[Exception]: logger = get_logger() db_path = config.get().db @@ -58,8 +49,7 @@ def vit_ok() -> Iterable[DbVisit]: yield ev tpath = Path(get_tmpdir().name) / 'promnesia.tmp.sqlite' - policy_update = update_policy_active() - if not policy_update: + if overwrite_db: engine = create_engine(f'sqlite:///{tpath}') else: engine = create_engine(f'sqlite:///{db_path}') @@ -82,12 +72,12 @@ def vit_ok() -> Iterable[DbVisit]: # pylint: disable=no-value-for-parameter conn.execute(table.insert().values(bound)) - if not policy_update: + if overwrite_db: shutil.move(str(tpath), str(db_path)) errs = '' if errors == 0 else f', {errors} ERRORS' total = ok + errors - what = 'updated' if policy_update else 'overwritten' + what = 'overwritten' if overwrite_db else 'updated' logger.info('%s database "%s". %d total (%d OK%s)', what, db_path, total, ok, errs) res: List[Exception] = [] if total == 0: