Skip to content
This repository has been archived by the owner on Nov 17, 2023. It is now read-only.

Commit

Permalink
fix BucketSentenceIter bug related to #11430 (#11580)
Browse files Browse the repository at this point in the history
* fix BucketSentenceIter bug

* add test case for smallest empty bucket
  • Loading branch information
Soonhwan-Kwon authored and szha committed Jul 18, 2018
1 parent 072dd5a commit 904052d
Show file tree
Hide file tree
Showing 2 changed files with 8 additions and 3 deletions.
7 changes: 6 additions & 1 deletion python/mxnet/rnn/io.py
Original file line number Diff line number Diff line change
Expand Up @@ -117,15 +117,20 @@ def __init__(self, sentences, batch_size, buckets=None, invalid_label=-1,

ndiscard = 0
self.data = [[] for _ in buckets]
valid_buckets = {}
for item in range(len(buckets)):
valid_buckets[item] = 0

for i, sent in enumerate(sentences):
buck = bisect.bisect_left(buckets, len(sent))
valid_buckets[buck] = 1
if buck == len(buckets):
ndiscard += 1
continue
buff = np.full((buckets[buck],), invalid_label, dtype=dtype)
buff[:len(sent)] = sent
self.data[buck].append(buff)

buckets = [j for i, j in enumerate(buckets) if valid_buckets[i] == 1]
self.data = [np.asarray(i, dtype=dtype) for i in self.data if i]

print("WARNING: discarded %d sentences longer than the largest bucket."%ndiscard)
Expand Down
4 changes: 2 additions & 2 deletions tests/python/train/test_bucketing.py
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,7 @@ def test_bucket_module():
num_embed = 25
num_layers = 2
len_vocab = 50
buckets = [10, 20, 30, 40]
buckets = [5, 10, 20, 30, 40]

invalid_label = -1
num_sentence = 1000
Expand All @@ -45,7 +45,7 @@ def test_bucket_module():
val_sent = []

for _ in range(num_sentence):
len_sentence = randint(1, max(buckets)-1) # leave out the two last buckets empty
len_sentence = randint(6, max(buckets)-1) # leave out the two last buckets empty
train_sentence = []
val_sentence = []
for _ in range(len_sentence):
Expand Down

0 comments on commit 904052d

Please sign in to comment.