Skip to content

Commit

Permalink
get rid of dataset library, it's not maintained anymore
Browse files Browse the repository at this point in the history
  • Loading branch information
karlicoss committed Aug 22, 2023
1 parent a3e0562 commit 8a5b2f3
Show file tree
Hide file tree
Showing 3 changed files with 97 additions and 51 deletions.
2 changes: 1 addition & 1 deletion setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,7 @@ def main() -> None:
author_email='karlicoss@gmail.com',
description='Backup and extract data from your Kobo reader',

install_requires=['pytz', 'dataset'],
install_requires=['pytz'],
extras_require={
'testing': ['pytest'],
'linting': ['pytest', 'mypy', 'lxml', 'types-pytz'],
Expand Down
99 changes: 49 additions & 50 deletions src/kobuddy/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@
import json
import shutil
import struct
import sqlite3
from contextlib import contextmanager
from datetime import datetime
from pathlib import Path
Expand All @@ -19,13 +20,11 @@
from typing import (Dict, Iterator, List, NamedTuple, Optional, Sequence, Set,
Tuple, Union, Iterable, Any)

import dataset # type: ignore
import pytz

from .common import get_logger, unwrap, cproperty, group_by_key, the, nullcontext, Res, sorted_res, split_res
from .kobo_device import get_kobo_mountpoint


from .sqlite import sqlite_connection


# a bit nasty to have a global variable here... will rewrite later
Expand Down Expand Up @@ -379,28 +378,26 @@ def by_dict(self, d) -> Book:
assert res is not None, d
return res


class Extra(NamedTuple):
time_spent: int
percent: int
status: int
last_read: Optional[datetime]


def _load_books(db) -> List[Tuple[Book, Extra]]:
def _load_books(db: sqlite3.Connection) -> List[Tuple[Book, Extra]]:
logger = get_logger()
content_table = db.load_table('content')
items: List[Tuple[Book, Extra]] = []
# wtf... that fails with some sqlalchemy crap
# books = content_table.find(ContentType=6)
books = db.query('SELECT * FROM content WHERE ContentType=6')
books = db.execute('SELECT * FROM content WHERE ContentType=6')
for b in books:
content_id = b['ContentID']
isbn = b['ISBN']
title = b['Title'].strip() # ugh. not sure about that, but sometimes helped
author = b['Attribution']

# TODO not so sure about that; it was the case for KoboShelfes databases
time_spent = b.get('TimeSpentReading', 0)
time_spent = 0 if 'TimeSpentReading' not in b.keys() else b['TimeSpentReading']
percent = b['___PercentRead']
status = int(b['ReadStatus'])
last_read = b['DateLastRead']
Expand Down Expand Up @@ -503,13 +500,6 @@ class Types:
#


def dataset_connect_ro(db: Path):
# support read only filesystems (also guarantees we don't change the database by accident)
import sqlite3
creator = lambda: sqlite3.connect(f'file:{db}?immutable=1', uri=True)
return dataset.connect('sqlite:///', engine_kwargs={'creator': creator})


# TODO use literal mypy types?
def _iter_events_aux(limit=None, errors='throw') -> Iterator[Res[Event]]:
# TODO handle all_ here?
Expand All @@ -522,10 +512,15 @@ def _iter_events_aux(limit=None, errors='throw') -> Iterator[Res[Event]]:
# TODO do it if it's defensive?
books = Books(create_if_missing=True)

for fname in dbs:
logger.info('processing %s', fname)
db = dataset_connect_ro(fname)

def connections():
for fname in dbs:
logger.info(f'processing {fname}')
with sqlite_connection(fname, immutable=True, row_factory='row') as db:
yield fname, db


for fname, db in connections():
for b, extra in _load_books(db):
books.add(b)
if extra is None:
Expand All @@ -536,7 +531,7 @@ def _iter_events_aux(limit=None, errors='throw') -> Iterator[Res[Event]]:
yield FinishedEvent(dt=dt, book=b, time_spent_s=extra.time_spent, eid=f'{b.content_id}-{fname.name}')

ET = EventTbl
for i, row in enumerate(db.query(f'SELECT {ET.EventType}, {ET.EventCount}, {ET.LastOccurrence}, {ET.ContentID}, {ET.Checksum}, hex({ET.ExtraData}) from Event')): # TODO order by?
for i, row in enumerate(db.execute(f'SELECT {ET.EventType}, {ET.EventCount}, {ET.LastOccurrence}, {ET.ContentID}, {ET.Checksum}, hex({ET.ExtraData}) from Event')): # TODO order by?
try:
yield from _iter_events_aux_Event(row=row, books=books, idx=i)
except Exception as e:
Expand All @@ -550,7 +545,7 @@ def _iter_events_aux(limit=None, errors='throw') -> Iterator[Res[Event]]:
AE = AnalyticsEvents
# events_table = db.load_table('AnalyticsEvents')
# TODO ugh. used to be events_table.all(), but started getting some 'Mandatory' field with a wrong schema at some point...
for row in db.query(f'SELECT {AE.Id}, {AE.Timestamp}, {AE.Type}, {AE.Attributes}, {AE.Metrics} from AnalyticsEvents'): # TODO order by??
for row in db.execute(f'SELECT {AE.Id}, {AE.Timestamp}, {AE.Type}, {AE.Attributes}, {AE.Metrics} from AnalyticsEvents'): # TODO order by??
try:
yield from _iter_events_aux_AnalyticsEvents(row=row, books=books)
except Exception as e:
Expand Down Expand Up @@ -839,18 +834,19 @@ def _iter_events_aux_AnalyticsEvents(*, row, books: Books) -> Iterator[Event]:
logger.warning(f'Unhandled entry of type {tp}: {row}')


def _get_books():
def _get_books() -> Books:
books = Books()
for bfile in DATABASES:
# TODO dispose?
db = dataset_connect_ro(bfile)
for b, _ in _load_books(db):
books.add(b)
with sqlite_connection(bfile, immutable=True, row_factory='row') as db:
for b, _ in _load_books(db):
books.add(b)
return books

def get_books():

def get_books() -> List[Book]:
return _get_books().all()


def _iter_highlights(**kwargs) -> Iterator[Highlight]:
logger = get_logger()
books = _get_books()
Expand All @@ -864,31 +860,31 @@ def _iter_highlights(**kwargs) -> Iterator[Highlight]:
yielded.add(h)


def _load_highlights(bfile: Path, books: Books):
def _load_highlights(bfile: Path, books: Books) -> Iterator[Highlight]:
logger = get_logger()
logger.info(f"Using %s for highlights", bfile)
db = dataset_connect_ro(bfile)
for bm in db.query('SELECT * FROM Bookmark'):
volumeid = bm['VolumeID']
mbook = books.by_content_id(volumeid)
if mbook is None:
# sometimes Kobo seems to recycle old books without recycling the corresponding bookmarks
# so we need to be a bit defensive here
# see https://github.com/karlicoss/kobuddy/issues/18
book = books.make_orphan_book(volumeid=volumeid)
else:
book = mbook
# todo maybe in the future it could be a matter of error policy, i.e. throw vs yield exception vs use orphan object vs ignore
# could be example of useful defensiveness in a provider
yield Highlight(bm, book=book)
logger.info(f"Using {bfile} for highlights")
with sqlite_connection(bfile, immutable=True, row_factory='row') as db:
for bm in db.execute('SELECT * FROM Bookmark'):
volumeid = bm['VolumeID']
mbook = books.by_content_id(volumeid)
if mbook is None:
# sometimes Kobo seems to recycle old books without recycling the corresponding bookmarks
# so we need to be a bit defensive here
# see https://github.com/karlicoss/kobuddy/issues/18
book = books.make_orphan_book(volumeid=volumeid)
else:
book = mbook
# todo maybe in the future it could be a matter of error policy, i.e. throw vs yield exception vs use orphan object vs ignore
# could be example of useful defensiveness in a provider
yield Highlight(bm, book=book)


def _load_wordlist(bfile: Path):
logger = get_logger()
logger.info(f"Using %s for highlights", bfile)
db = dataset_connect_ro(bfile)
for bm in db.query('SELECT * FROM WordList'):
yield bm['Text']
logger.info(f"Using {bfile} for wordlist")
with sqlite_connection(bfile, immutable=True, row_factory='row') as db:
for bm in db.execute('SELECT * FROM WordList'):
yield bm['Text']


def get_highlights(**kwargs) -> List[Highlight]:
Expand Down Expand Up @@ -971,6 +967,7 @@ def _event_key(evt):
tie_breaker = 2
return (evt.dt, tie_breaker)


class BookEvents:
def __init__(self, book: Book, events):
assert all(e.book == book for e in events)
Expand Down Expand Up @@ -1011,10 +1008,12 @@ def get_books_with_events(**kwargs) -> Sequence[Res[BookEvents]]:
vit = sorted(vit, key=lambda be: be.last)
return list(chain(vit, eit))


def _fmt_dt(dt: datetime) -> str:
return dt.strftime('%d %b %Y %H:%M')

def print_progress(full=True, **kwargs):

def print_progress(full=True, **kwargs) -> None:
logger = get_logger()
for bevents in get_books_with_events(**kwargs):
if isinstance(bevents, Exception):
Expand All @@ -1032,12 +1031,12 @@ def print_progress(full=True, **kwargs):
print(f"-- {_fmt_dt(e.dt)}: {e.summary}")


def print_books():
def print_books() -> None:
for b in get_books():
print(b)


def print_annotations():
def print_annotations() -> None:
for i in get_highlights():
h = f"""
{_fmt_dt(i.dt)} {i._book}
Expand Down
47 changes: 47 additions & 0 deletions src/kobuddy/sqlite.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,47 @@
# copy from HPI:my/core/sqlite.py
from contextlib import contextmanager
from pathlib import Path
import sqlite3
from typing import Callable, Any, Union, Literal, Optional, Iterator


PathIsh = Union[Path, str]


SqliteRowFactory = Callable[[sqlite3.Cursor, sqlite3.Row], Any]


def dict_factory(cursor, row):
fields = [column[0] for column in cursor.description]
return {key: value for key, value in zip(fields, row)}


Factory = Union[SqliteRowFactory, Literal['row', 'dict']]

@contextmanager
def sqlite_connection(db: PathIsh, *, immutable: bool=False, row_factory: Optional[Factory]=None) -> Iterator[sqlite3.Connection]:
dbp = f'file:{db}'
# https://www.sqlite.org/draft/uri.html#uriimmutable
if immutable:
# assert results in nicer error than sqlite3.OperationalError
assert Path(db).exists(), db
dbp = f'{dbp}?immutable=1'
row_factory_: Any = None
if row_factory is not None:
if callable(row_factory):
row_factory_ = row_factory
elif row_factory == 'row':
row_factory_ = sqlite3.Row
elif row_factory == 'dict':
row_factory_ = dict_factory
else:
raise RuntimeError(f"Can't happen: {row_factory}")

conn = sqlite3.connect(dbp, uri=True)
try:
conn.row_factory = row_factory_
with conn:
yield conn
finally:
# Connection context manager isn't actually closing the connection, only keeps transaction
conn.close()

0 comments on commit 8a5b2f3

Please sign in to comment.