Skip to content

Commit

Permalink
update BERTopic test
Browse files Browse the repository at this point in the history
Signed-off-by: Tim Schopf <tim.schopf@t-online.de>
  • Loading branch information
TimSchopf committed Apr 29, 2024
1 parent b1e1ff4 commit c3bc123
Show file tree
Hide file tree
Showing 2 changed files with 4 additions and 5 deletions.
2 changes: 1 addition & 1 deletion tests/requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -3,5 +3,5 @@ keybert>=0.5.0
flair==0.11.3
scipy==1.7.3
bertopic>=0.16.1
datasets==2.13.2
scikit-learn>=1.0.1
umap-learn==0.5.4
7 changes: 3 additions & 4 deletions tests/test_vectorizers.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,10 +3,10 @@
import flair
import spacy
from bertopic import BERTopic
from datasets import load_dataset
from flair.models import SequenceTagger
from flair.tokenization import SegtokSentenceSplitter
from keybert import KeyBERT
from sklearn.datasets import fetch_20newsgroups

import tests.utils as utils
from keyphrase_vectorizers import KeyphraseCountVectorizer, KeyphraseTfidfVectorizer
Expand Down Expand Up @@ -172,9 +172,8 @@ def test_online_vectorizer():


def test_bertopic():
data = load_dataset("ag_news")
texts = data['train']['text']
texts = texts[:100]
data = fetch_20newsgroups(subset='train')
texts = data.data[:100]
topic_model = BERTopic(vectorizer_model=KeyphraseCountVectorizer())
topics, probs = topic_model.fit_transform(documents=texts)
new_topics = topic_model.reduce_outliers(texts, topics)
Expand Down

0 comments on commit c3bc123

Please sign in to comment.