Skip to content

Commit

Permalink
FEAT: --update/overwrite option TODOed by karlicoss#20,
Browse files Browse the repository at this point in the history
- CLI options described in the 2nd case explained in karlicoss#211,
  due to simplicity.
- The precedence for deciding update/overwrite:
  env-var, --update/overwrite, --source exist?
- Function defaults are false, as suggested in
  [karlicoss#20](karlicoss#20 (comment)).
- Both index & demo updated.
- Env-var now checks its value one of (update|overwrite).
- All update/overwrite decision logic moved to __main_.
  • Loading branch information
ankostis committed Mar 3, 2021
1 parent 92ecac7 commit 0775abb
Show file tree
Hide file tree
Showing 2 changed files with 48 additions and 19 deletions.
49 changes: 44 additions & 5 deletions src/promnesia/__main__.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
import os
import logging
import inspect
import os
import sys
from typing import List, Tuple, Optional, Dict, Sequence, Iterable, Iterator, Union
from pathlib import Path
Expand Down Expand Up @@ -63,7 +64,7 @@ def iter_all_visits(sources_subset: Iterable[Union[str, int]]=()) -> Iterator[Re
logger.warning("unknown --sources: %s", ", ".join(repr(i) for i in sources_subset))


def _do_index(dry: bool=False, sources_subset: Iterable[Union[str, int]]=()) -> Iterable[Exception]:
def _do_index(dry: bool=False, sources_subset: Iterable[Union[str, int]]=(), overwrite_db=False) -> Iterable[Exception]:
# also keep & return errors for further display
errors: List[Exception] = []
def it() -> Iterable[Res[DbVisit]]:
Expand All @@ -78,7 +79,7 @@ def it() -> Iterable[Res[DbVisit]]:
for v in res:
print(v)
else:
dump_errors = visits_to_sqlite(it())
dump_errors = visits_to_sqlite(it(), overwrite_db=overwrite_db)
for e in dump_errors:
logger.exception(e)
errors.append(e)
Expand All @@ -90,10 +91,11 @@ def do_index(
dry: bool=False,
sources_subset: Iterable[Union[str, int]]=(),
overwrite: bool=None,
overwrite_db=False,
) -> None:
config.load_from(config_file) # meh.. should be cleaner
try:
errors = list(_do_index(dry=dry, sources_subset=sources_subset))
errors = list(_do_index(dry=dry, sources_subset=sources_subset, overwrite_db=overwrite_db))
finally:
config.reset()
if len(errors) > 0:
Expand Down Expand Up @@ -133,6 +135,7 @@ def do_demo(*
config_file: Optional[Path],
name: str='demo',
sources_subset: Iterable[Union[str, int]]=(),
overwrite_db: bool=False,
) -> None:
from pprint import pprint
with TemporaryDirectory() as tdir:
Expand All @@ -149,7 +152,7 @@ def do_demo(*
)
config.instance = cfg

errors = list(_do_index(sources_subset=sources_subset))
errors = list(_do_index(sources_subset=sources_subset, overwrite_db=overwrite_db))
if len(errors) > 0:
logger.error('%d errors during indexing (see logs above for backtraces)', len(errors))
for e in errors:
Expand Down Expand Up @@ -303,6 +306,21 @@ def main() -> None:
help="Source names (or their 0-indexed position) to index."
" If missing, db is recreated empty and all sources are indexed.",
)
overwrite = ep.add_mutually_exclusive_group()
overwrite.add_argument(
'--update',
required=False,
action="store_const",
const=True,
dest="overwrite_db",
help="Keep existing visits in db and merge new ones collected."
" If neither is given, --update assumed when --sources given (the default)"
", unless PROMNESIA_INDEX_POLICY=(update|overwrite) env-var defined"
", which takes precendance."
" Conflicts with --update.%(default)0.0s"
)
overwrite.add_argument('--overwrite', required=False, action="store_const", const=False, dest="overwrite_db",
help="The opposite of --update: recreate db with newly indexed visits%(default)0.0s")

sp = subp.add_parser('serve', help='Serve a link database', formatter_class=F) # type: ignore
server.setup_parser(sp)
Expand Down Expand Up @@ -358,13 +376,33 @@ def main() -> None:
p.print_help(sys.stderr)
sys.exit(1)

overwrite_policy_var = os.environ.get("PROMNESIA_INDEX_POLICY")
if overwrite_policy_var:
overwrite_policy_var = overwrite_policy_var.lower()
if overwrite_policy_var not in ("update", "overwrite"):
print(
f"Invalid value for PROMNESIA_INDEX_POLICY env-var: {overwrite_policy_var}"
"\n Must be one of (update | overwrite).",
file=sys.stderr)
sys.exit(2)
args.overwrite_db = overwrite_policy_var == "overwrite"
if args.overwrite_db is None:
args.overwrite_db = not bool(args.sources)

logger.info("CLI args: %s", args)

# TODO maybe, it's better for server to compute intermediate represetnation?
# the only downside is storage. dunno.
# worst case -- could use database?

with get_tmpdir() as tdir: # TODO??
if args.mode == 'index':
do_index(config_file=args.config, dry=args.dry, sources_subset=args.sources)
do_index(
config_file=args.config,
dry=args.dry,
sources_subset=args.sources,
overwrite_db=args.overwrite_db,
)
elif args.mode == 'serve':
server.run(args)
elif args.mode == 'demo':
Expand All @@ -377,6 +415,7 @@ def main() -> None:
config_file=args.config,
name=args.name,
sources_subset=args.sources,
overwrite_db=args.overwrite_db,
)
elif args.mode == 'install-server': # todo rename to 'autostart' or something?
install_server.install(args)
Expand Down
18 changes: 4 additions & 14 deletions src/promnesia/dump.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,3 @@
import os
from pathlib import Path
import shutil
from typing import Dict, List, Tuple, Set, Iterable
Expand All @@ -14,14 +13,6 @@
from . import config


def update_policy_active() -> bool:
# NOTE: experimental.. need to make it a proper cmdline argument later
INDEX_POLICY = os.environ.get('PROMNESIA_INDEX_POLICY', 'overwrite_all')
# if 'update' is passed, will run against the existing db and only tough the sources present in the current index run
# not sue if a good name for this..
return INDEX_POLICY == 'update'


# NOTE: I guess the main performance benefit from this is not creating too many tmp lists and avoiding overhead
# since as far as sql is concerned it should all be in the same transaction. only a guess
# not sure it's the proper way to handle it
Expand All @@ -30,7 +21,7 @@ def update_policy_active() -> bool:


# returns critical warnings
def visits_to_sqlite(vit: Iterable[Res[DbVisit]]) -> List[Exception]:
def visits_to_sqlite(vit: Iterable[Res[DbVisit]], *, overwrite_db: bool) -> List[Exception]:
logger = get_logger()
db_path = config.get().db

Expand Down Expand Up @@ -58,8 +49,7 @@ def vit_ok() -> Iterable[DbVisit]:
yield ev

tpath = Path(get_tmpdir().name) / 'promnesia.tmp.sqlite'
policy_update = update_policy_active()
if not policy_update:
if overwrite_db:
engine = create_engine(f'sqlite:///{tpath}')
else:
engine = create_engine(f'sqlite:///{db_path}')
Expand All @@ -82,12 +72,12 @@ def vit_ok() -> Iterable[DbVisit]:
# pylint: disable=no-value-for-parameter
conn.execute(table.insert().values(bound))

if not policy_update:
if overwrite_db:
shutil.move(str(tpath), str(db_path))

errs = '' if errors == 0 else f', {errors} ERRORS'
total = ok + errors
what = 'updated' if policy_update else 'overwritten'
what = 'overwritten' if overwrite_db else 'updated'
logger.info('%s database "%s". %d total (%d OK%s)', what, db_path, total, ok, errs)
res: List[Exception] = []
if total == 0:
Expand Down

0 comments on commit 0775abb

Please sign in to comment.