Skip to content

Commit

Permalink
Allow loading documents with no labels
Browse files Browse the repository at this point in the history
Now ``Dataset.load_from_files_multilabel()`` can load documents with no
labels as well.
  • Loading branch information
sergioburdisso committed May 26, 2020
1 parent 030bb7b commit 31251f8
Showing 1 changed file with 12 additions and 2 deletions.
14 changes: 12 additions & 2 deletions pyss3/util.py
Original file line number Diff line number Diff line change
Expand Up @@ -1800,6 +1800,7 @@ def load_from_files_multilabel(docs_path, labels_path, sep_label=None, sep_doc='
:type sep_doc: str
:returns: the (x_train, y_train) or (x_test, y_test) pairs.
:rtype: tuple
:raises: ValueError
"""
x_data = []
y_data = []
Expand All @@ -1816,11 +1817,20 @@ def load_from_files_multilabel(docs_path, labels_path, sep_label=None, sep_doc='
doc_labels = {}
doc_names = []

for doc_name, label in doc_labels_raw:
for i_doc_label, doc_label in enumerate(doc_labels_raw):
if len(doc_label) == 2:
doc_name, label = doc_label
elif len(doc_label) > 2:
doc_name, label = doc_label[0], " ".join(doc_label[1:])
else:
if doc_label[0] not in doc_labels:
doc_labels[doc_label[0]] = []
continue

if doc_name not in doc_labels:
doc_labels[doc_name] = [label]
doc_names.append(doc_name)
else:
elif label:
doc_labels[doc_name].append(label)
cat_info[label] += 1

Expand Down

0 comments on commit 31251f8

Please sign in to comment.