From e883d76241d2228cf3570d2f4737803e67b3d28f Mon Sep 17 00:00:00 2001 From: Kostis Anagnostopoulos Date: Mon, 1 Mar 2021 16:41:51 +0200 Subject: [PATCH] FEAT: update db by default, add --overwrite option ... as suggested in #20: - drop PROMNESIA_INDEX_POLICY env-var. - CLI options described in the 2nd case explained in #211, due to simplicity. - Function defaults are false, as suggested in [#20](https://github.com/karlicoss/promnesia/issues/20#issuecomment-778382929). - Both index & demo updated. - Env-var now checks its value one of (update|overwrite). - All update/overwrite decision logic moved to __main_. --- doc/GUIDE.org | 4 +++- src/promnesia/__main__.py | 49 +++++++++++++++++++++++++++++++-------- src/promnesia/dump.py | 18 ++++---------- tests/integration_test.py | 10 +------- 4 files changed, 47 insertions(+), 34 deletions(-) diff --git a/doc/GUIDE.org b/doc/GUIDE.org index c672308a..e2107706 100644 --- a/doc/GUIDE.org +++ b/doc/GUIDE.org @@ -110,7 +110,9 @@ Also see [[https://github.com/karlicoss/promnesia/issues/172][issues/172]]. ** partial update -(experimental) Set env variable =PROMNESIA_INDEX_POLICY=update=. +Only index sources given in =promnesia index --sources SOURCE [SOURCE] ...= +(or all sources, if no =--sources= given), unless =--overwrite= is given, +in which case all existing visits are removed from db prior to indexing. ** exclude files from =auto= indexer diff --git a/src/promnesia/__main__.py b/src/promnesia/__main__.py index f6f025d2..5e3b3bf5 100644 --- a/src/promnesia/__main__.py +++ b/src/promnesia/__main__.py @@ -1,5 +1,4 @@ import argparse -import os import logging import inspect import sys @@ -63,7 +62,7 @@ def iter_all_visits(sources_subset: Iterable[Union[str, int]]=()) -> Iterator[Re logger.warning("unknown --sources: %s", ", ".join(repr(i) for i in sources_subset)) -def _do_index(dry: bool=False, sources_subset: Iterable[Union[str, int]]=()) -> Iterable[Exception]: +def _do_index(dry: bool=False, sources_subset: Iterable[Union[str, int]]=(), overwrite_db=False) -> Iterable[Exception]: # also keep & return errors for further display errors: List[Exception] = [] def it() -> Iterable[Res[DbVisit]]: @@ -78,7 +77,7 @@ def it() -> Iterable[Res[DbVisit]]: for v in res: print(v) else: - dump_errors = visits_to_sqlite(it()) + dump_errors = visits_to_sqlite(it(), overwrite_db=overwrite_db) for e in dump_errors: logger.exception(e) errors.append(e) @@ -89,10 +88,11 @@ def do_index( config_file: Path, dry: bool=False, sources_subset: Iterable[Union[str, int]]=(), + overwrite_db: bool=False, ) -> None: config.load_from(config_file) # meh.. should be cleaner try: - errors = list(_do_index(dry=dry, sources_subset=sources_subset)) + errors = list(_do_index(dry=dry, sources_subset=sources_subset, overwrite_db=overwrite_db)) finally: config.reset() if len(errors) > 0: @@ -133,6 +133,7 @@ def do_demo( config_file: Optional[Path], name: str='demo', sources_subset: Iterable[Union[str, int]]=(), + overwrite_db: bool=False, ) -> None: from pprint import pprint with TemporaryDirectory() as tdir: @@ -149,7 +150,7 @@ def do_demo( ) config.instance = cfg - errors = list(_do_index(sources_subset=sources_subset)) + errors = list(_do_index(sources_subset=sources_subset, overwrite_db=overwrite_db)) if len(errors) > 0: logger.error('%d errors during indexing (see logs above for backtraces)', len(errors)) for e in errors: @@ -300,8 +301,14 @@ def main() -> None: nargs="+", type=_ordinal_or_name, metavar="SOURCE", - help="Source names (or their 0-indexed position) to index." - " If missing, db is recreated empty and all sources are indexed.", + help="Source names (or their 0-indexed position) to index.", + ) + ep.add_argument( + '--overwrite', + required=False, + action="store_true", + help="Empty db before populating it with newly indexed visits." + " If interrupted, db is left untouched." ) sp = subp.add_parser('serve', help='Serve a link database', formatter_class=F) # type: ignore @@ -323,8 +330,22 @@ def main() -> None: default='guess', help='Promnesia source to index as (see https://github.com/karlicoss/promnesia/tree/master/src/promnesia/sources for the full list)', ) - ap.add_argument('--sources', required=False, action="extend", nargs="+", type=_ordinal_or_name, - help="Subset of source(s) to run (name or 0-indexed position); use `promnisia --dry` to view sources") + ap.add_argument( + '--sources', + required=False, + action="extend", + nargs="+", + type=_ordinal_or_name, + metavar="SOURCE", + help="Source names (or their 0-indexed position) to index.", + ) + ap.add_argument( + '--overwrite', + required=False, + action="store_true", + help="Empty db before populating it with newly indexed visits." + " If interrupted, db is left untouched." + ) ap.add_argument('params', nargs='*', help='Optional extra params for the indexer') isp = subp.add_parser('install-server', help='Install server as a systemd service (for autostart)', formatter_class=F) @@ -358,13 +379,20 @@ def main() -> None: p.print_help(sys.stderr) sys.exit(1) + logger.info("CLI args: %s", args) + # TODO maybe, it's better for server to compute intermediate represetnation? # the only downside is storage. dunno. # worst case -- could use database? with get_tmpdir() as tdir: # TODO?? if args.mode == 'index': - do_index(config_file=args.config, dry=args.dry, sources_subset=args.sources) + do_index( + config_file=args.config, + dry=args.dry, + sources_subset=args.sources, + overwrite_db=args.overwrite, + ) elif args.mode == 'serve': server.run(args) elif args.mode == 'demo': @@ -377,6 +405,7 @@ def main() -> None: config_file=args.config, name=args.name, sources_subset=args.sources, + overwrite_db=args.overwrite, ) elif args.mode == 'install-server': # todo rename to 'autostart' or something? install_server.install(args) diff --git a/src/promnesia/dump.py b/src/promnesia/dump.py index 49198909..f42e325e 100644 --- a/src/promnesia/dump.py +++ b/src/promnesia/dump.py @@ -1,4 +1,3 @@ -import os from pathlib import Path import shutil from typing import Dict, List, Tuple, Set, Iterable @@ -14,14 +13,6 @@ from . import config -def update_policy_active() -> bool: - # NOTE: experimental.. need to make it a proper cmdline argument later - INDEX_POLICY = os.environ.get('PROMNESIA_INDEX_POLICY', 'overwrite_all') - # if 'update' is passed, will run against the existing db and only tough the sources present in the current index run - # not sue if a good name for this.. - return INDEX_POLICY == 'update' - - # NOTE: I guess the main performance benefit from this is not creating too many tmp lists and avoiding overhead # since as far as sql is concerned it should all be in the same transaction. only a guess # not sure it's the proper way to handle it @@ -30,7 +21,7 @@ def update_policy_active() -> bool: # returns critical warnings -def visits_to_sqlite(vit: Iterable[Res[DbVisit]]) -> List[Exception]: +def visits_to_sqlite(vit: Iterable[Res[DbVisit]], *, overwrite_db: bool) -> List[Exception]: logger = get_logger() db_path = config.get().db @@ -58,8 +49,7 @@ def vit_ok() -> Iterable[DbVisit]: yield ev tpath = Path(get_tmpdir().name) / 'promnesia.tmp.sqlite' - policy_update = update_policy_active() - if not policy_update: + if overwrite_db: engine = create_engine(f'sqlite:///{tpath}') else: engine = create_engine(f'sqlite:///{db_path}') @@ -82,12 +72,12 @@ def vit_ok() -> Iterable[DbVisit]: # pylint: disable=no-value-for-parameter conn.execute(table.insert().values(bound)) - if not policy_update: + if overwrite_db: shutil.move(str(tpath), str(db_path)) errs = '' if errors == 0 else f', {errors} ERRORS' total = ok + errors - what = 'updated' if policy_update else 'overwritten' + what = 'overwritten' if overwrite_db else 'updated' logger.info('%s database "%s". %d total (%d OK%s)', what, db_path, total, ok, errs) res: List[Exception] = [] if total == 0: diff --git a/tests/integration_test.py b/tests/integration_test.py index f9ffdb1d..6f7e2be0 100644 --- a/tests/integration_test.py +++ b/tests/integration_test.py @@ -13,15 +13,7 @@ def run_index(cfg: Path, *, update=False) -> None: from promnesia.__main__ import do_index - if update: - ev = 'PROMNESIA_INDEX_POLICY' - os.environ[ev] = 'update' - try: - do_index(cfg) - finally: - del os.environ[ev] - else: - do_index(cfg) + do_index(cfg, overwrite_db=not update) index = run_index # legacy name