Skip to content

Commit

Permalink
FEAT: --update/overwrite option TODOed by karlicoss#20,
Browse files Browse the repository at this point in the history
- CLI options described in the 2nd case explained in karlicoss#211,
  due to simplicity.
- The precedence for deciding update/overwrite:
  env-var, --update/overwrite, --source exist?
- Function defaults are false, as suggested in
  [karlicoss#20](karlicoss#20 (comment)).
- Both index & demo updated.
- Env-var now checks its value one of (update|overwrite).
- All update/overwrite decision logic moved to __main_.
  • Loading branch information
ankostis committed Mar 1, 2021
1 parent 4fb9cbd commit 581f1d5
Show file tree
Hide file tree
Showing 2 changed files with 55 additions and 20 deletions.
57 changes: 51 additions & 6 deletions src/promnesia/__main__.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
import os
import logging
import inspect
import os
import sys
from typing import List, Tuple, Optional, Dict, Sequence, Iterable, Iterator
from pathlib import Path
Expand Down Expand Up @@ -66,7 +67,7 @@ def iter_all_visits(sources_subset: Iterable[str]=()) -> Iterator[Res[DbVisit]]:
yield e


def _do_index(dry: bool=False, sources_subset: Iterable[str]=()) -> Iterable[Exception]:
def _do_index(dry: bool=False, sources_subset: Iterable[str]=(), overwrite_db=False) -> Iterable[Exception]:
# also keep & return errors for further display
errors: List[Exception] = []
def it() -> Iterable[Res[DbVisit]]:
Expand All @@ -81,17 +82,23 @@ def it() -> Iterable[Res[DbVisit]]:
for v in res:
print(v)
else:
dump_errors = visits_to_sqlite(it())
dump_errors = visits_to_sqlite(it(), overwrite_db)
for e in dump_errors:
logger.exception(e)
errors.append(e)
return errors


def do_index(config_file: Path, dry: bool=False, sources_subset: Iterable[str]=()) -> None:
def do_index(
config_file: Path,
dry: bool=False,
sources_subset: Iterable[str]=(),
overwrite: bool=None,
overwrite_db=False,
) -> None:
config.load_from(config_file) # meh.. should be cleaner
try:
errors = list(_do_index(dry=dry, sources_subset=sources_subset))
errors = list(_do_index(dry=dry, sources_subset=sources_subset, overwrite_db=overwrite_db))
finally:
config.reset()
if len(errors) > 0:
Expand Down Expand Up @@ -131,6 +138,7 @@ def do_demo(*
config_file: Optional[Path],
name: str='demo',
sources_subset: Iterable[str]=(),
overwrite_db: bool=False,
) -> None:
from pprint import pprint
with TemporaryDirectory() as tdir:
Expand All @@ -147,7 +155,7 @@ def do_demo(*
)
config.instance = cfg

errors = list(_do_index(sources_subset=sources_subset))
errors = list(_do_index(sources_subset=sources_subset, overwrite_db=overwrite_db))
if len(errors) > 0:
logger.error('%d errors during indexing (see logs above for backtraces)', len(errors))
for e in errors:
Expand Down Expand Up @@ -293,6 +301,22 @@ def main() -> None:
ep.add_argument('--intermediate', required=False, help="Used for development, you don't need it")
ep.add_argument('--sources', required=False, action="extend", nargs="+", type=_parse_ordinal_or_name,
help="Subset of source(s) to run (name or 0-indexed position); use `promnisia --dry` to view sources")
overwrite = ep.add_mutually_exclusive_group()
overwrite.add_argument(
'--update',
required=False,
action="store_const",
const=True,
dest="overwrite_db",
help=
"Keep existing visits in db and merge new ones collected."
" If neither is given, --update assumed when --sources given (the default)"
", unless PROMNESIA_INDEX_POLICY=(update|overwrite) env-var defined"
", which takes precendance."
" Conflicts with --update.%(default)0.0s"
)
overwrite.add_argument('--overwrite', required=False, action="store_const", const=False, dest="overwrite_db",
help="The opposite of --update: recreate db with newly indexed visits%(default)0.0s")

sp = subp.add_parser('serve', help='Serve a link database', formatter_class=F) # type: ignore
server.setup_parser(sp)
Expand Down Expand Up @@ -348,13 +372,33 @@ def main() -> None:
p.print_help(sys.stderr)
sys.exit(1)

overwrite_policy_var = os.environ.get("PROMNESIA_INDEX_POLICY")
if overwrite_policy_var:
overwrite_policy_var = overwrite_policy_var.lower()
if overwrite_policy_var not in ("update", "overwrite"):
print(
f"Invalid value for PROMNESIA_INDEX_POLICY env-var: {overwrite_policy_var}"
"\n Must be one of (update | overwrite).",
file=sys.stderr)
sys.exit(2)
args.overwrite_db = overwrite_policy_var == "overwrite"
if args.overwrite_db is None:
args.overwrite_db = not bool(args.sources)

logger.info("CLI args: %s", args)

# TODO maybe, it's better for server to compute intermediate represetnation?
# the only downside is storage. dunno.
# worst case -- could use database?

with get_tmpdir() as tdir: # TODO??
if args.mode == 'index':
do_index(config_file=args.config, dry=args.dry, sources_subset=args.sources)
do_index(
config_file=args.config,
dry=args.dry,
sources_subset=args.sources,
overwrite_db=args.overwrite_db,
)
elif args.mode == 'serve':
server.run(args)
elif args.mode == 'demo':
Expand All @@ -367,6 +411,7 @@ def main() -> None:
config_file=args.config,
name=args.name,
sources_subset=args.sources,
overwrite_db=args.overwrite_db,
)
elif args.mode == 'install-server': # todo rename to 'autostart' or something?
install_server.install(args)
Expand Down
18 changes: 4 additions & 14 deletions src/promnesia/dump.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,3 @@
import os
from pathlib import Path
import shutil
from typing import Dict, List, Tuple, Set, Iterable
Expand All @@ -14,14 +13,6 @@
from . import config


def update_policy_active() -> bool:
# NOTE: experimental.. need to make it a proper cmdline argument later
INDEX_POLICY = os.environ.get('PROMNESIA_INDEX_POLICY', 'overwrite_all')
# if 'update' is passed, will run against the existing db and only tough the sources present in the current index run
# not sue if a good name for this..
return INDEX_POLICY == 'update'


# NOTE: I guess the main performance benefit from this is not creating too many tmp lists and avoiding overhead
# since as far as sql is concerned it should all be in the same transaction. only a guess
# not sure it's the proper way to handle it
Expand All @@ -30,7 +21,7 @@ def update_policy_active() -> bool:


# returns critical warnings
def visits_to_sqlite(vit: Iterable[Res[DbVisit]]) -> List[Exception]:
def visits_to_sqlite(vit: Iterable[Res[DbVisit]], overwrite_db: bool) -> List[Exception]:
logger = get_logger()
db_path = config.get().db

Expand Down Expand Up @@ -58,8 +49,7 @@ def vit_ok() -> Iterable[DbVisit]:
yield ev

tpath = Path(get_tmpdir().name) / 'promnesia.tmp.sqlite'
policy_update = update_policy_active()
if not policy_update:
if overwrite_db:
engine = create_engine(f'sqlite:///{tpath}')
else:
engine = create_engine(f'sqlite:///{db_path}')
Expand All @@ -82,12 +72,12 @@ def vit_ok() -> Iterable[DbVisit]:
# pylint: disable=no-value-for-parameter
conn.execute(table.insert().values(bound))

if not policy_update:
if overwrite_db:
shutil.move(str(tpath), str(db_path))

errs = '' if errors == 0 else f', {errors} ERRORS'
total = ok + errors
what = 'updated' if policy_update else 'overwritten'
what = 'overwritten' if overwrite_db else 'updated'
logger.info('%s database "%s". %d total (%d OK%s)', what, db_path, total, ok, errs)
res: List[Exception] = []
if total == 0:
Expand Down

0 comments on commit 581f1d5

Please sign in to comment.