Skip to content

Commit

Permalink
Use the batch functionality in the movielens example (#659)
Browse files Browse the repository at this point in the history
Batch calls to similar_items in the movielens example script.
  • Loading branch information
benfred committed May 25, 2023
1 parent 3cb4532 commit 012e34d
Show file tree
Hide file tree
Showing 2 changed files with 14 additions and 10 deletions.
4 changes: 2 additions & 2 deletions examples/lastfm.py
Original file line number Diff line number Diff line change
Expand Up @@ -111,7 +111,7 @@ def calculate_similar_artists(output_filename, model_name="als"):
artist = artists[artistid]
for other, score in zip(ids[i], scores[i]):
o.write(f"{artist}\t{artists[other]}\t{score}\n")
progress.update(batch_size)
progress.update(len(batch))

logging.debug("generated similar artists in %0.2fs", time.time() - start)

Expand Down Expand Up @@ -158,7 +158,7 @@ def calculate_recommendations(output_filename, model_name="als"):
username = users[userid]
for other, score in zip(ids[i], scores[i]):
o.write(f"{username}\t{artists[other]}\t{score}\n")
progress.update(batch_size)
progress.update(len(batch))
logging.debug("generated recommendations in %0.2fs", time.time() - start)


Expand Down
20 changes: 12 additions & 8 deletions examples/movielens.py
Original file line number Diff line number Diff line change
Expand Up @@ -88,14 +88,18 @@ def calculate_similar_movies(output_filename, model_name="als", min_rating=4.0,
log.debug("calculating similar movies")
with tqdm.tqdm(total=len(to_generate)) as progress:
with codecs.open(output_filename, "w", "utf8") as o:
for movieid in to_generate:
# if this movie has no ratings, skip over (for instance 'Graffiti Bridge' has
# no ratings > 4 meaning we've filtered out all data for it.
if ratings.indptr[movieid] != ratings.indptr[movieid + 1]:
title = titles[movieid]
for other, score in zip(*model.similar_items(movieid, 11)):
o.write(f"{title}\t{titles[other]}\t{score}\n")
progress.update(1)
batch_size = 1000
for startidx in range(0, len(to_generate), batch_size):
batch = to_generate[startidx : startidx + batch_size]
ids, scores = model.similar_items(batch, 11)
for i, movieid in enumerate(batch):
# if this movie has no ratings, skip over (for instance 'Graffiti Bridge' has
# no ratings > 4 meaning we've filtered out all data for it.
if ratings.indptr[movieid] != ratings.indptr[movieid + 1]:
title = titles[movieid]
for other, score in zip(ids[i], scores[i]):
o.write(f"{title}\t{titles[other]}\t{score}\n")
progress.update(len(batch))


if __name__ == "__main__":
Expand Down

0 comments on commit 012e34d

Please sign in to comment.