From 08c1e9973a8ff34b64effbdf712658012d539059 Mon Sep 17 00:00:00 2001
From: Edwin Onuonga <edwinonuonga@gmail.com>
Date: Sun, 26 Jun 2022 16:23:27 +0100
Subject: [PATCH 1/2] Update test_gmmhmm.py

---
 lib/test/lib/classifiers/hmm/test_gmmhmm.py | 167 ++++++++++----------
 1 file changed, 82 insertions(+), 85 deletions(-)

diff --git a/lib/test/lib/classifiers/hmm/test_gmmhmm.py b/lib/test/lib/classifiers/hmm/test_gmmhmm.py
index bf99992..ec95131 100644
--- a/lib/test/lib/classifiers/hmm/test_gmmhmm.py
+++ b/lib/test/lib/classifiers/hmm/test_gmmhmm.py
@@ -1,23 +1,23 @@
-import pytest, warnings, os, numpy as np, hmmlearn.base, hmmlearn.hmm
+import pytest, warnings, os, math, numpy as np, hmmlearn.base, hmmlearn.hmm
 from copy import deepcopy
 from sequentia.classifiers import GMMHMM, _LeftRightTopology, _ErgodicTopology, _LinearTopology
+from sequentia.datasets import load_random_sequences
 from ....support import assert_equal, assert_not_equal, assert_all_equal, assert_all_not_equal
 
-pytest.skip('Skip until datasets module is added and positive definite issues are fixed', allow_module_level=True)
+# pytest.skip('Skip until datasets module is added and positive definite issues are fixed', allow_module_level=True)
 
 # Set seed for reproducible randomness
-seed = 0
-np.random.seed(seed)
-rng = np.random.RandomState(seed)
+random_state = np.random.RandomState(0)
 
 # Create some sample data
-X = [rng.random((10 * i, 3)) for i in range(1, 4)]
-x = rng.random((15, 3))
+dataset = load_random_sequences(15, n_features=2, n_classes=2, length_range=(20, 30), random_state=random_state)
+X = [x for x, y in dataset if y == 0]
+x = X[0]
 
 # Unparameterized HMMs
-hmm_lr = GMMHMM(label='c1', n_states=5, topology='left-right', random_state=rng)
-hmm_e = GMMHMM(label='c1', n_states=5, topology='ergodic', random_state=rng)
-hmm_lin = GMMHMM(label='c1', n_states=5, topology='linear', random_state=rng)
+hmm_lr = GMMHMM(label='c1', n_states=5, topology='left-right', random_state=random_state)
+hmm_e = GMMHMM(label='c1', n_states=5, topology='ergodic', random_state=random_state)
+hmm_lin = GMMHMM(label='c1', n_states=5, topology='linear', random_state=random_state)
 
 # ========================================================= #
 # GMMHMM.set_uniform_initial() + GMMHMM.initial_ (property) #
@@ -56,7 +56,7 @@ def test_left_right_random_initial():
     hmm = deepcopy(hmm_lr)
     hmm.set_random_initial()
     assert_equal(hmm.initial_, np.array([
-        0.35029635, 0.13344569, 0.02784745, 0.33782453, 0.15058597
+        0.53803322, 0.12404781, 0.07762362, 0.17663443, 0.08366092
     ]))
 
 def test_ergodic_random_initial():
@@ -64,7 +64,7 @@ def test_ergodic_random_initial():
     hmm = deepcopy(hmm_e)
     hmm.set_random_initial()
     assert_equal(hmm.initial_, np.array([
-        0.35029635, 0.13344569, 0.02784745, 0.33782453, 0.15058597
+        0.53803322, 0.12404781, 0.07762362, 0.17663443, 0.08366092
     ]))
 
 def test_linear_random_initial():
@@ -72,7 +72,7 @@ def test_linear_random_initial():
     hmm = deepcopy(hmm_lin)
     hmm.set_random_initial()
     assert_equal(hmm.initial_, np.array([
-        0.35029635, 0.13344569, 0.02784745, 0.33782453, 0.15058597
+        0.53803322, 0.12404781, 0.07762362, 0.17663443, 0.08366092
     ]))
 
 # ================================================================= #
@@ -124,10 +124,10 @@ def test_left_right_random_transitions():
     hmm = deepcopy(hmm_lr)
     hmm.set_random_transitions()
     assert_equal(hmm.transitions_, np.array([
-        [0.35029635, 0.13344569, 0.02784745, 0.33782453, 0.15058597],
-        [0.        , 0.22725263, 0.18611702, 0.56646299, 0.02016736],
-        [0.        , 0.        , 0.18542075, 0.44084593, 0.37373332],
-        [0.        , 0.        , 0.        , 0.65696153, 0.34303847],
+        [0.53803322, 0.12404781, 0.07762362, 0.17663443, 0.08366092],
+        [0.        , 0.0544546 , 0.167254  , 0.43679272, 0.34149867],
+        [0.        , 0.        , 0.02569653, 0.93686415, 0.03743932],
+        [0.        , 0.        , 0.        , 0.80245882, 0.19754118],
         [0.        , 0.        , 0.        , 0.        , 1.        ]
     ]))
 
@@ -136,11 +136,11 @@ def test_ergodic_random_transitions():
     hmm = deepcopy(hmm_e)
     hmm.set_random_transitions()
     assert_equal(hmm.transitions_, np.array([
-        [0.35029635, 0.13344569, 0.02784745, 0.33782453, 0.15058597],
-        [0.19252534, 0.15767581, 0.47989976, 0.01708551, 0.15281357],
-        [0.19375092, 0.16425506, 0.21828034, 0.11397708, 0.30973661],
-        [0.46906977, 0.02941216, 0.17137502, 0.0333193 , 0.29682374],
-        [0.21312406, 0.35221103, 0.08556524, 0.06613143, 0.28296824]
+        [0.53803322, 0.12404781, 0.07762362, 0.17663443, 0.08366092],
+        [0.05407134, 0.16607684, 0.43371851, 0.33909515, 0.00703816],
+        [0.12935118, 0.00516918, 0.67173292, 0.16536041, 0.02838631],
+        [0.15768347, 0.31907791, 0.42873228, 0.06083948, 0.03366686],
+        [0.42607069, 0.17697038, 0.33288653, 0.04212738, 0.02194502]
     ]))
 
 def test_linear_random_transitions():
@@ -148,10 +148,10 @@ def test_linear_random_transitions():
     hmm = deepcopy(hmm_lin)
     hmm.set_random_transitions()
     assert_equal(hmm.transitions_, np.array([
-        [0.72413873, 0.27586127, 0.        , 0.        , 0.        ],
-        [0.        , 0.07615418, 0.92384582, 0.        , 0.        ],
-        [0.        , 0.        , 0.81752797, 0.18247203, 0.        ],
-        [0.        , 0.        , 0.        , 0.24730529, 0.75269471],
+        [0.81263954, 0.18736046, 0.        , 0.        , 0.        ],
+        [0.        , 0.30529464, 0.69470536, 0.        , 0.        ],
+        [0.        , 0.        , 0.34435856, 0.65564144, 0.        ],
+        [0.        , 0.        , 0.        , 0.27688918, 0.72311082],
         [0.        , 0.        , 0.        , 0.        , 1.        ]
     ]))
 
@@ -188,7 +188,7 @@ def test_fit_sets_internals():
     hmm.set_uniform_initial()
     hmm.set_uniform_transitions()
     hmm.fit(X)
-    assert hmm.n_seqs_ == 3
+    assert hmm.n_seqs_ == 8
     assert isinstance(hmm.model, hmmlearn.hmm.GMMHMM)
 
 def test_left_right_fit_updates_uniform_initial():
@@ -210,7 +210,7 @@ def test_left_right_fit_updates_random_initial():
     hmm.set_random_transitions()
     before = hmm.initial_
     assert_equal(before, np.array([
-        0.35029635, 0.13344569, 0.02784745, 0.33782453, 0.15058597
+        0.53803322, 0.12404781, 0.07762362, 0.17663443, 0.08366092
     ]))
     hmm.fit(X)
     assert_not_equal(before, hmm.initial_)
@@ -238,10 +238,10 @@ def test_left_right_fit_updates_random_transitions():
     hmm.set_random_transitions()
     before = hmm.transitions_
     assert_equal(before, np.array([
-        [0.19252534, 0.15767581, 0.47989976, 0.01708551, 0.15281357],
-        [0.        , 0.28069128, 0.23795997, 0.31622761, 0.16512114],
-        [0.        , 0.        , 0.29431489, 0.66404724, 0.04163787],
-        [0.        , 0.        , 0.        , 0.8372241 , 0.1627759 ],
+        [0.05407134, 0.16607684, 0.43371851, 0.33909515, 0.00703816],
+        [0.        , 0.13313025, 0.0053202 , 0.69135803, 0.17019152],
+        [0.        , 0.        , 0.11443295, 0.29289135, 0.59267569],
+        [0.        , 0.        , 0.        , 0.87572918, 0.12427082],
         [0.        , 0.        , 0.        , 0.        , 1.        ]
     ]))
     hmm.fit(X)
@@ -266,7 +266,7 @@ def test_ergodic_fit_updates_random_initial():
     hmm.set_random_transitions()
     before = hmm.initial_
     assert_equal(before, np.array([
-        0.35029635, 0.13344569, 0.02784745, 0.33782453, 0.15058597
+        0.53803322, 0.12404781, 0.07762362, 0.17663443, 0.08366092
     ]))
     hmm.fit(X)
     assert_not_equal(before, hmm.initial_)
@@ -294,11 +294,12 @@ def test_ergodic_fit_updates_random_transitions():
     hmm.set_random_transitions()
     before = hmm.transitions_
     assert_equal(before, np.array([
-        [0.19252534, 0.15767581, 0.47989976, 0.01708551, 0.15281357],
-        [0.19375092, 0.16425506, 0.21828034, 0.11397708, 0.30973661],
-        [0.46906977, 0.02941216, 0.17137502, 0.0333193 , 0.29682374],
-        [0.21312406, 0.35221103, 0.08556524, 0.06613143, 0.28296824],
-        [0.05212313, 0.3345513 , 0.17192948, 0.16379392, 0.27760217]]))
+        [0.05407134, 0.16607684, 0.43371851, 0.33909515, 0.00703816],
+        [0.12935118, 0.00516918, 0.67173292, 0.16536041, 0.02838631],
+        [0.15768347, 0.31907791, 0.42873228, 0.06083948, 0.03366686],
+        [0.42607069, 0.17697038, 0.33288653, 0.04212738, 0.02194502],
+        [0.20328414, 0.13729798, 0.03560389, 0.4874536 , 0.13636039]
+    ]))
     hmm.fit(X)
     assert_not_equal(before, hmm.transitions_)
 
@@ -321,7 +322,7 @@ def test_linear_fit_updates_random_initial():
     hmm.set_random_transitions()
     before = hmm.initial_
     assert_equal(before, np.array([
-        0.35029635, 0.13344569, 0.02784745, 0.33782453, 0.15058597
+        0.53803322, 0.12404781, 0.07762362, 0.17663443, 0.08366092
     ]))
     hmm.fit(X)
     assert_not_equal(before, hmm.initial_)
@@ -348,11 +349,12 @@ def test_linear_fit_updates_random_transitions():
     hmm.set_random_initial()
     hmm.set_random_transitions()
     before = hmm.transitions_
+    print(repr(before))
     assert_equal(before, np.array([
-        [0.54975645, 0.45024355, 0.        , 0.        , 0.        ],
-        [0.        , 0.96562169, 0.03437831, 0.        , 0.        ],
-        [0.        , 0.        , 0.29607315, 0.70392685, 0.        ],
-        [0.        , 0.        , 0.        , 0.42938524, 0.57061476],
+        [0.24561338, 0.75438662, 0.        , 0.        , 0.        ],
+        [0.        , 0.56122003, 0.43877997, 0.        , 0.        ],
+        [0.        , 0.        , 0.02669601, 0.97330399, 0.        ],
+        [0.        , 0.        , 0.        , 0.00763653, 0.99236347],
         [0.        , 0.        , 0.        , 0.        , 1.        ]
     ]))
     hmm.fit(X)
@@ -377,7 +379,7 @@ def test_left_right_forward():
     hmm.set_random_initial()
     hmm.set_random_transitions()
     hmm.fit(X)
-    assert isinstance(hmm.forward(x), float)
+    assert math.isclose(hmm.forward(x), -89.59052551245605)
 
 def test_ergodic_forward():
     """Forward algorithm on an ergodic HMM"""
@@ -385,7 +387,7 @@ def test_ergodic_forward():
     hmm.set_random_initial()
     hmm.set_random_transitions()
     hmm.fit(X)
-    assert isinstance(hmm.forward(x), float)
+    assert math.isclose(hmm.forward(x), -97.67911812603418)
 
 def test_linear_forward():
     """Forward algorithm on a linear HMM"""
@@ -393,7 +395,7 @@ def test_linear_forward():
     hmm.set_random_initial()
     hmm.set_random_transitions()
     hmm.fit(X)
-    assert isinstance(hmm.forward(x), float)
+    assert math.isclose(hmm.forward(x), -90.25666060143605)
 
 # =============== #
 # GMMHMM.freeze() #
@@ -574,7 +576,7 @@ def test_n_seqs_with_fit():
     hmm.set_random_initial()
     hmm.set_random_transitions()
     hmm.fit(X)
-    assert hmm.n_seqs_ == 3
+    assert hmm.n_seqs_ == 8
 
 # ======================== #
 # GMMHMM.frozen (property) #
@@ -653,11 +655,11 @@ def test_means_with_fit():
     hmm.set_random_transitions()
     hmm.fit(X)
     assert_equal(hmm.means_, np.array([
-        [[0.31874666, 0.66724147, 0.13182087]],
-        [[0.31856896, 0.66741038, 0.13179786]],
-        [[0.71632403, 0.28939952, 0.18320713]],
-        [[0.51787902, 0.57561888, 0.5995548 ]],
-        [[0.66975947, 0.26867588, 0.25477769]]
+        [[ 0.49517361,  0.79670013]],
+        [[ 1.81277369, -2.45995611]],
+        [[-0.61198527, -0.2621587 ]],
+        [[ 1.40168717,  0.16718235]],
+        [[-2.05338535, -3.13926956]]
     ]))
 
 # ========================= #
@@ -680,21 +682,16 @@ def test_covars_with_fit():
     hmm.set_random_transitions()
     hmm.fit(X)
     assert_equal(hmm.covars_, np.array([
-        [[[ 0.08307002,  0.00160875,  0.0157381 ],
-          [ 0.00160875,  0.08735411, -0.01063379],
-          [ 0.0157381 , -0.01063379,  0.08286247]]],
-        [[[ 0.08307002,  0.00160875,  0.0157381 ],
-          [ 0.00160875,  0.08735411, -0.01063379],
-          [ 0.0157381 , -0.01063379,  0.08286247]]],
-        [[[ 0.08307002,  0.00160875,  0.0157381 ],
-          [ 0.00160875,  0.08735411, -0.01063379],
-          [ 0.0157381 , -0.01063379,  0.08286247]]],
-        [[[ 0.08307002,  0.00160875,  0.0157381 ],
-          [ 0.00160875,  0.08735411, -0.01063379],
-          [ 0.0157381 , -0.01063379,  0.08286247]]],
-        [[[ 0.08307002,  0.00160875,  0.0157381 ],
-          [ 0.00160875,  0.08735411, -0.01063379],
-          [ 0.0157381 , -0.01063379,  0.08286247]]]
+        [[[ 1.38488559,  0.38570541],
+          [ 0.38570541,  0.63293189]]],
+        [[[ 1.73706667,  0.28568952],
+          [ 0.28568952,  0.47176263]]],
+        [[[ 0.99011246, -0.10938155],
+          [-0.10938155,  0.01847633]]],
+        [[[ 1.94877336, -0.17877102],
+          [-0.17877102,  2.42880862]]],
+        [[[ 3.52820275,  0.5571457 ],
+          [ 0.5571457 ,  0.2716629 ]]]
     ]))
 
 # ========================== #
@@ -726,7 +723,7 @@ def test_transitions_without_setting():
 def test_left_right_initial_left_right():
     """Set an initial state distribution generated by a left-right topology on a left-right HMM"""
     hmm = deepcopy(hmm_lr)
-    topology = _LeftRightTopology(n_states=5, random_state=rng)
+    topology = _LeftRightTopology(n_states=5, random_state=random_state)
     initial = topology.random_initial()
     hmm.initial_ = initial
     assert_equal(hmm.initial_, initial)
@@ -734,7 +731,7 @@ def test_left_right_initial_left_right():
 def test_left_right_initial_ergodic():
     """Set an initial state distribution generated by a left-right topology on an ergodic HMM"""
     hmm = deepcopy(hmm_lr)
-    topology = _ErgodicTopology(n_states=5, random_state=rng)
+    topology = _ErgodicTopology(n_states=5, random_state=random_state)
     initial = topology.random_initial()
     hmm.initial_ = initial
     assert_equal(hmm.initial_, initial)
@@ -742,7 +739,7 @@ def test_left_right_initial_ergodic():
 def test_left_right_initial_linear():
     """Set an initial state distribution generated by a left-right topology on an linear HMM"""
     hmm = deepcopy(hmm_lr)
-    topology = _LinearTopology(n_states=5, random_state=rng)
+    topology = _LinearTopology(n_states=5, random_state=random_state)
     initial = topology.random_initial()
     hmm.initial_ = initial
     assert_equal(hmm.initial_, initial)
@@ -750,7 +747,7 @@ def test_left_right_initial_linear():
 def test_ergodic_initial_left_right():
     """Set an initial state distribution generated by an ergodic topology on a left-right HMM"""
     hmm = deepcopy(hmm_e)
-    topology = _LeftRightTopology(n_states=5, random_state=rng)
+    topology = _LeftRightTopology(n_states=5, random_state=random_state)
     initial = topology.random_initial()
     hmm.initial_ = initial
     assert_equal(hmm.initial_, initial)
@@ -758,7 +755,7 @@ def test_ergodic_initial_left_right():
 def test_ergodic_initial_ergodic():
     """Set an initial state distribution generated by an ergodic topology on an ergodic HMM"""
     hmm = deepcopy(hmm_e)
-    topology = _ErgodicTopology(n_states=5, random_state=rng)
+    topology = _ErgodicTopology(n_states=5, random_state=random_state)
     initial = topology.random_initial()
     hmm.initial_ = initial
     assert_equal(hmm.initial_, initial)
@@ -766,7 +763,7 @@ def test_ergodic_initial_ergodic():
 def test_ergodic_initial_linear():
     """Set an initial state distribution generated by an ergodic topology on a linear HMM"""
     hmm = deepcopy(hmm_e)
-    topology = _LinearTopology(n_states=5, random_state=rng)
+    topology = _LinearTopology(n_states=5, random_state=random_state)
     initial = topology.random_initial()
     hmm.initial_ = initial
     assert_equal(hmm.initial_, initial)
@@ -774,7 +771,7 @@ def test_ergodic_initial_linear():
 def test_linear_initial_left_right():
     """Set an initial state distribution generated by a linear topology on a left-right HMM"""
     hmm = deepcopy(hmm_lin)
-    topology = _LeftRightTopology(n_states=5, random_state=rng)
+    topology = _LeftRightTopology(n_states=5, random_state=random_state)
     initial = topology.random_initial()
     hmm.initial_ = initial
     assert_equal(hmm.initial_, initial)
@@ -782,7 +779,7 @@ def test_linear_initial_left_right():
 def test_linear_initial_ergodic():
     """Set an initial state distribution generated by a linear topology on an ergodic HMM"""
     hmm = deepcopy(hmm_lin)
-    topology = _ErgodicTopology(n_states=5, random_state=rng)
+    topology = _ErgodicTopology(n_states=5, random_state=random_state)
     initial = topology.random_initial()
     hmm.initial_ = initial
     assert_equal(hmm.initial_, initial)
@@ -790,7 +787,7 @@ def test_linear_initial_ergodic():
 def test_linear_initial_linear():
     """Set an initial state distribution generated by a linear topology on an linear HMM"""
     hmm = deepcopy(hmm_lin)
-    topology = _LinearTopology(n_states=5, random_state=rng)
+    topology = _LinearTopology(n_states=5, random_state=random_state)
     initial = topology.random_initial()
     hmm.initial_ = initial
     assert_equal(hmm.initial_, initial)
@@ -802,7 +799,7 @@ def test_linear_initial_linear():
 def test_left_right_transitions_left_right():
     """Set a transition matrix generated by a left-right topology on a left-right HMM"""
     hmm = deepcopy(hmm_lr)
-    topology = _LeftRightTopology(n_states=5, random_state=rng)
+    topology = _LeftRightTopology(n_states=5, random_state=random_state)
     transitions = topology.random_transitions()
     hmm.transitions_ = transitions
     assert_equal(hmm.transitions_, transitions)
@@ -810,7 +807,7 @@ def test_left_right_transitions_left_right():
 def test_left_right_transitions_ergodic():
     """Set a transition matrix generated by a left-right topology on an ergodic HMM"""
     hmm = deepcopy(hmm_lr)
-    topology = _ErgodicTopology(n_states=5, random_state=rng)
+    topology = _ErgodicTopology(n_states=5, random_state=random_state)
     transitions = topology.random_transitions()
     with pytest.raises(ValueError) as e:
         hmm.transitions_ = transitions
@@ -819,7 +816,7 @@ def test_left_right_transitions_ergodic():
 def test_left_right_transitions_linear():
     """Set a transition matrix generated by a left-right topology on a linear HMM"""
     hmm = deepcopy(hmm_lr)
-    topology = _LinearTopology(n_states=5, random_state=rng)
+    topology = _LinearTopology(n_states=5, random_state=random_state)
     transitions = topology.random_transitions()
     hmm.transitions_ = transitions
     assert_equal(hmm.transitions_, transitions)
@@ -827,7 +824,7 @@ def test_left_right_transitions_linear():
 def test_ergodic_transitions_left_right():
     """Set a transition matrix generated by an ergodic topology on a left-right HMM"""
     hmm = deepcopy(hmm_e)
-    topology = _LeftRightTopology(n_states=5, random_state=rng)
+    topology = _LeftRightTopology(n_states=5, random_state=random_state)
     transitions = topology.random_transitions()
     with pytest.warns(UserWarning) as w:
         hmm.transitions_ = transitions
@@ -837,7 +834,7 @@ def test_ergodic_transitions_left_right():
 def test_ergodic_transitions_ergodic():
     """Set a transition matrix generated by an ergodic topology on an ergodic HMM"""
     hmm = deepcopy(hmm_e)
-    topology = _ErgodicTopology(n_states=5, random_state=rng)
+    topology = _ErgodicTopology(n_states=5, random_state=random_state)
     transitions = topology.random_transitions()
     hmm.transitions_ = transitions
     assert_equal(hmm.transitions_, transitions)
@@ -845,7 +842,7 @@ def test_ergodic_transitions_ergodic():
 def test_ergodic_transitions_linear():
     """Set a transition matrix generated by an ergodic topology on a linear HMM"""
     hmm = deepcopy(hmm_e)
-    topology = _LinearTopology(n_states=5, random_state=rng)
+    topology = _LinearTopology(n_states=5, random_state=random_state)
     transitions = topology.random_transitions()
     with pytest.warns(UserWarning) as w:
         hmm.transitions_ = transitions
@@ -855,7 +852,7 @@ def test_ergodic_transitions_linear():
 def test_linear_transitions_left_right():
     """Set a transition matrix generated by a linear topology on a left-right HMM"""
     hmm = deepcopy(hmm_lin)
-    topology = _LeftRightTopology(n_states=5, random_state=rng)
+    topology = _LeftRightTopology(n_states=5, random_state=random_state)
     transitions = topology.random_transitions()
     with pytest.raises(ValueError) as e:
         hmm.transitions_ = transitions
@@ -864,7 +861,7 @@ def test_linear_transitions_left_right():
 def test_linear_transitions_ergodic():
     """Set a transition matrix generated by a linear topology on an ergodic HMM"""
     hmm = deepcopy(hmm_lin)
-    topology = _ErgodicTopology(n_states=5, random_state=rng)
+    topology = _ErgodicTopology(n_states=5, random_state=random_state)
     transitions = topology.random_transitions()
     with pytest.raises(ValueError) as e:
         hmm.transitions_ = transitions
@@ -873,7 +870,7 @@ def test_linear_transitions_ergodic():
 def test_linear_transitions_linear():
     """Set a transition matrix generated by a linear topology on a linear HMM"""
     hmm = deepcopy(hmm_lin)
-    topology = _LinearTopology(n_states=5, random_state=rng)
+    topology = _LinearTopology(n_states=5, random_state=random_state)
     transitions = topology.random_transitions()
     hmm.transitions_ = transitions
     assert_equal(hmm.transitions_, transitions)
\ No newline at end of file

From 9def4b63440feac3cd8da20237593ab0c3efa8da Mon Sep 17 00:00:00 2001
From: Edwin Onuonga <edwinonuonga@gmail.com>
Date: Sun, 26 Jun 2022 17:31:36 +0100
Subject: [PATCH 2/2] Fix HMM tests

---
 lib/test/lib/classifiers/hmm/test_gmmhmm.py   |   2 -
 .../classifiers/hmm/test_hmm_classifier.py    | 163 +++++++++++-------
 2 files changed, 105 insertions(+), 60 deletions(-)

diff --git a/lib/test/lib/classifiers/hmm/test_gmmhmm.py b/lib/test/lib/classifiers/hmm/test_gmmhmm.py
index ec95131..e148337 100644
--- a/lib/test/lib/classifiers/hmm/test_gmmhmm.py
+++ b/lib/test/lib/classifiers/hmm/test_gmmhmm.py
@@ -4,8 +4,6 @@
 from sequentia.datasets import load_random_sequences
 from ....support import assert_equal, assert_not_equal, assert_all_equal, assert_all_not_equal
 
-# pytest.skip('Skip until datasets module is added and positive definite issues are fixed', allow_module_level=True)
-
 # Set seed for reproducible randomness
 random_state = np.random.RandomState(0)
 
diff --git a/lib/test/lib/classifiers/hmm/test_hmm_classifier.py b/lib/test/lib/classifiers/hmm/test_hmm_classifier.py
index 8fba993..72249c1 100644
--- a/lib/test/lib/classifiers/hmm/test_hmm_classifier.py
+++ b/lib/test/lib/classifiers/hmm/test_hmm_classifier.py
@@ -1,32 +1,29 @@
 import os, pickle, pytest, warnings, os, numpy as np, hmmlearn.hmm
 from copy import deepcopy
 from sequentia.classifiers import GMMHMM, HMMClassifier, _ErgodicTopology
+from sequentia.datasets import load_random_sequences
 from ....support import assert_equal, assert_not_equal
 
-pytest.skip('Skip until datasets module is added and positive definite issues are fixed', allow_module_level=True)
+# pytest.skip('Skip until datasets module is added and positive definite issues are fixed', allow_module_level=True)
 
 # Set seed for reproducible randomness
-seed = 0
-np.random.seed(seed)
-rng = np.random.RandomState(seed)
+random_state = np.random.RandomState(0)
 
-# Set of possible labels
-labels = ['c{}'.format(i) for i in range(5)]
+# Create some sample data
+dataset = load_random_sequences(50, n_features=2, n_classes=5, length_range=(10, 30), random_state=random_state)
+dataset.classes = [f'c{i}' for i in range(5)]
+dataset.y = np.array([f'c{i}' for i in dataset.y])
+x, y = dataset[0]
 
-# Create and fit some sample HMMs
+# Create and fit some HMMs
 hmms = []
-for i, label in enumerate(labels):
-    hmm = GMMHMM(label=label, n_states=(i + 3), random_state=rng)
+for sequences, label in dataset.iter_by_class():
+    hmm = GMMHMM(label=label, n_states=5, random_state=random_state)
     hmm.set_random_initial()
     hmm.set_random_transitions()
-    hmm.fit([np.arange((i + j * 20) * 30).reshape(-1, 3) for j in range(2, 5)])
+    hmm.fit(sequences)
     hmms.append(hmm)
 
-# Create some sample test data and labels
-X = [np.arange((i + 2 * 20) * 30).reshape(-1, 3) for i in range(2, 5)]
-Y = ['c0', 'c1', 'c1']
-x, y = X[0], 'c1'
-
 # Fit a classifier
 hmm_clf = HMMClassifier()
 hmm_clf.fit(hmms)
@@ -62,81 +59,119 @@ def test_fit_list_invalid():
 def test_predict_single_frequency_prior():
     """Predict a single observation sequence with a frequency prior"""
     prediction = hmm_clf.predict(x, prior='frequency', return_scores=False, original_labels=False)
-    assert prediction == 0
+    assert prediction == 1
 
 def test_predict_single_uniform_prior():
     """Predict a single observation sequence with a uniform prior"""
     prediction = hmm_clf.predict(x, prior='uniform', return_scores=False, original_labels=False)
-    assert prediction == 0
+    assert prediction == 1
 
 def test_predict_single_custom_prior():
     """Predict a single observation sequence with a custom prior"""
     prediction = hmm_clf.predict(x, prior=([1e-50]*4+[1-4e-50]), return_scores=False, original_labels=False)
-    assert prediction == 4
+    assert prediction == 1
 
 def test_predict_single_return_scores():
     """Predict a single observation sequence and return the transformed label, with the un-normalized posterior scores"""
     prediction = hmm_clf.predict(x, prior='frequency', return_scores=True, original_labels=False)
     assert isinstance(prediction, tuple)
-    assert prediction[0] == 0
+    assert prediction[0] == 1
     assert_equal(prediction[1], np.array([
-        -1225.88304108, -1266.85875999, -1266.96016441, -1226.97939403, -1274.89102844
+        -131.46105165, -78.80931343, -99.35179093, -90.89464994, -483.92229446
     ]))
 
 def test_predict_single_original_labels():
     """Predict a single observation sequence and return the original label, without the un-normalized posterior scores"""
     prediction = hmm_clf.predict(x, prior='uniform', return_scores=False, original_labels=True)
-    assert prediction == 'c0'
+    assert prediction == 'c1'
 
 def test_predict_single_return_scores_original_labels():
     """Predict a single observation sequence and return the original label, with the un-normalized posterior scores"""
     prediction = hmm_clf.predict(x, prior='frequency', return_scores=True, original_labels=True)
     assert isinstance(prediction, tuple)
-    assert prediction[0] == 'c0'
+    assert prediction[0] == 'c1'
     assert_equal(prediction[1], np.array([
-        -1225.88304108, -1266.85875999, -1266.96016441, -1226.97939403, -1274.89102844
+        -131.46105165, -78.80931343, -99.35179093, -90.89464994, -483.92229446
     ]))
 
 def test_predict_multiple_frequency_prior():
     """Predict multiple observation sequences with a frequency prior"""
-    predictions = hmm_clf.predict(X, prior='frequency', return_scores=False, original_labels=False)
-    assert_equal(predictions, np.array([0, 0, 0]))
+    predictions = hmm_clf.predict(dataset.X, prior='frequency', return_scores=False, original_labels=False)
+    assert_equal(predictions, np.array([
+        1, 3, 1, 0, 1, 3, 1, 1, 3, 2, 1, 2, 3, 2, 4, 1, 3, 2, 0, 0, 1, 1,
+        3, 1, 1, 3, 1, 1, 1, 1, 4, 3, 0, 3, 1, 2, 3, 1, 2, 1, 1, 1, 3, 3,
+        2, 3, 1, 1, 4, 1
+    ]))
 
 def test_predict_multiple_uniform_prior():
     """Predict multiple observation sequences with a uniform prior"""
-    predictions = hmm_clf.predict(X, prior='uniform', return_scores=False, original_labels=False)
-    assert_equal(predictions, np.array([0, 0, 0]))
+    predictions = hmm_clf.predict(dataset.X, prior='uniform', return_scores=False, original_labels=False)
+    assert_equal(predictions, np.array([
+        1, 3, 1, 0, 1, 3, 1, 1, 3, 2, 1, 2, 3, 2, 4, 1, 3, 2, 0, 0, 1, 1,
+        3, 1, 1, 3, 1, 1, 1, 1, 4, 3, 0, 3, 1, 2, 3, 1, 2, 1, 1, 1, 3, 3,
+        2, 3, 1, 1, 4, 1
+    ]))
 
 def test_predict_multiple_custom_prior():
     """Predict multiple observation sequences with a custom prior"""
-    predictions = hmm_clf.predict(X, prior=([1-4e-50]+[1e-50]*4), return_scores=False, original_labels=False)
-    assert_equal(predictions, np.array([0, 0, 0]))
+    predictions = hmm_clf.predict(dataset.X, prior=([1-4e-50]+[1e-50]*4), return_scores=False, original_labels=False)
+    assert_equal(predictions, np.array([
+        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 0, 0, 0, 0, 0,
+        0, 0, 0, 0, 0, 0
+    ]))
 
 def test_predict_multiple_return_scores():
     """Predict multiple observation sequences and return the transformed labels, with the un-normalized posterior scores"""
-    predictions = hmm_clf.predict(X, prior='frequency', return_scores=True, original_labels=False)
+    predictions = hmm_clf.predict(dataset.X, prior='frequency', return_scores=True, original_labels=False)
     assert isinstance(predictions, tuple)
-    assert_equal(predictions[0], np.array([0, 0, 0]))
-    assert_equal(predictions[1], np.array([
-        [-1225.88304108, -1266.85875999, -1266.96016441, -1226.97939403, -1274.89102844],
-        [-1254.2158035 , -1299.37586652, -1299.75108935, -1255.8359274 , -1308.71071239],
-        [-1282.57116414, -1330.90436081, -1331.63379359, -1284.79130597, -1342.45717804]
+    assert_equal(predictions[0], np.array([
+        1, 3, 1, 0, 1, 3, 1, 1, 3, 2, 1, 2, 3, 2, 4, 1, 3, 2, 0, 0, 1, 1,
+        3, 1, 1, 3, 1, 1, 1, 1, 4, 3, 0, 3, 1, 2, 3, 1, 2, 1, 1, 1, 3, 3,
+        2, 3, 1, 1, 4, 1
+    ]))
+    assert_equal(predictions[1][:5], np.array([
+        [-131.46105165,  -78.80931343,  -99.35179093,  -90.89464994, -483.92229446],
+        [ -91.58935678,  -66.6556658 ,  -91.46883547,  -65.69934269, -716.797869  ],
+        [ -97.5230626 ,  -74.50878143,  -99.1544397 ,  -76.48361176, -690.2988915 ],
+        [  14.24986519,  -44.85298283,  -41.50143234,  -40.50844881, -148.67734234],
+        [ -95.11368472,  -40.81069058,  -59.46841129,  -52.60034218, -430.36823963]
     ]))
 
 def test_predict_multiple_original_labels():
     """Predict multiple observation sequences and return the original labels, without the un-normalized posterior scores"""
-    predictions = hmm_clf.predict(X, prior='frequency', return_scores=False, original_labels=True)
-    assert all(np.equal(predictions.astype(object), np.array(['c0', 'c0', 'c0'], dtype=object)))
+    predictions = hmm_clf.predict(dataset.X, prior='frequency', return_scores=False, original_labels=True)
+    assert all(np.equal(
+        predictions.astype(object),
+        np.array([
+            'c1', 'c3', 'c1', 'c0', 'c1', 'c3', 'c1', 'c1', 'c3', 'c2', 'c1',
+            'c2', 'c3', 'c2', 'c4', 'c1', 'c3', 'c2', 'c0', 'c0', 'c1', 'c1',
+            'c3', 'c1', 'c1', 'c3', 'c1', 'c1', 'c1', 'c1', 'c4', 'c3', 'c0',
+            'c3', 'c1', 'c2', 'c3', 'c1', 'c2', 'c1', 'c1', 'c1', 'c3', 'c3',
+            'c2', 'c3', 'c1', 'c1', 'c4', 'c1'
+        ], dtype=object)
+    ))
 
 def test_predict_multiple_return_scores_original_labels():
     """Predict multiple observation sequences and return the original labels, with the un-normalized posterior scores"""
-    predictions = hmm_clf.predict(X, prior='frequency', return_scores=True, original_labels=True)
+    predictions = hmm_clf.predict(dataset.X, prior='frequency', return_scores=True, original_labels=True)
     assert isinstance(predictions, tuple)
-    assert all(np.equal(predictions[0].astype(object), np.array(['c0', 'c0', 'c0'], dtype=object)))
-    assert_equal(predictions[1], np.array([
-        [-1225.88304108, -1266.85875999, -1266.96016441, -1226.97939403, -1274.89102844],
-        [-1254.2158035 , -1299.37586652, -1299.75108935, -1255.8359274 , -1308.71071239],
-        [-1282.57116414, -1330.90436081, -1331.63379359, -1284.79130597, -1342.45717804]
+    assert all(np.equal(
+        predictions[0].astype(object),
+        np.array([
+            'c1', 'c3', 'c1', 'c0', 'c1', 'c3', 'c1', 'c1', 'c3', 'c2', 'c1',
+            'c2', 'c3', 'c2', 'c4', 'c1', 'c3', 'c2', 'c0', 'c0', 'c1', 'c1',
+            'c3', 'c1', 'c1', 'c3', 'c1', 'c1', 'c1', 'c1', 'c4', 'c3', 'c0',
+            'c3', 'c1', 'c2', 'c3', 'c1', 'c2', 'c1', 'c1', 'c1', 'c3', 'c3',
+            'c2', 'c3', 'c1', 'c1', 'c4', 'c1'
+        ], dtype=object)
+    ))
+    assert_equal(predictions[1][:5], np.array([
+        [-131.46105165,  -78.80931343,  -99.35179093,  -90.89464994, -483.92229446],
+        [ -91.58935678,  -66.6556658 ,  -91.46883547,  -65.69934269, -716.797869  ],
+        [ -97.5230626 ,  -74.50878143,  -99.1544397 ,  -76.48361176, -690.2988915 ],
+        [  14.24986519,  -44.85298283,  -41.50143234,  -40.50844881, -148.67734234],
+        [ -95.11368472,  -40.81069058,  -59.46841129,  -52.60034218, -430.36823963]
     ]))
 
 # ======================== #
@@ -145,14 +180,15 @@ def test_predict_multiple_return_scores_original_labels():
 
 def test_evaluate():
     """Evaluate performance on some observation sequences and labels"""
-    acc, cm = hmm_clf.evaluate(X, Y, prior='frequency')
-    assert acc == 1 / 3
+    acc, cm = hmm_clf.evaluate(dataset.X, dataset.y, prior='frequency')
+    assert acc == 0.92
+    print(repr(cm))
     assert_equal(cm, np.array([
-        [1, 0, 0, 0, 0],
-        [2, 0, 0, 0, 0],
-        [0, 0, 0, 0, 0],
-        [0, 0, 0, 0, 0],
-        [0, 0, 0, 0, 0]
+        [ 4,  0,  0,  0,  0],
+        [ 0, 20,  0,  1,  0],
+        [ 0,  1,  7,  0,  0],
+        [ 0,  2,  0, 12,  0],
+        [ 0,  0,  0,  0,  3]
     ]))
 
 # ==================== #
@@ -199,15 +235,26 @@ def test_load_valid():
         # Check that all fields are still the same
         assert isinstance(clf, HMMClassifier)
         assert all(isinstance(model, GMMHMM) for model in clf.models_)
-        assert [model.label for model in clf.models_] == labels
-        assert list(clf.encoder_.classes_) == labels
-        predictions = clf.predict(X, prior='frequency', return_scores=True, original_labels=True)
+        assert [model.label for model in clf.models_] == dataset.classes
+        assert list(clf.encoder_.classes_) == dataset.classes
+        predictions = clf.predict(dataset.X, prior='frequency', return_scores=True, original_labels=True)
         assert isinstance(predictions, tuple)
-        assert all(np.equal(predictions[0].astype(object), np.array(['c0', 'c0', 'c0'], dtype=object)))
-        assert_equal(predictions[1], np.array([
-            [-1225.88304108, -1266.85875999, -1266.96016441, -1226.97939403, -1274.89102844],
-            [-1254.2158035 , -1299.37586652, -1299.75108935, -1255.8359274 , -1308.71071239],
-            [-1282.57116414, -1330.90436081, -1331.63379359, -1284.79130597, -1342.45717804]
+        assert all(np.equal(
+            predictions[0].astype(object),
+            np.array([
+                'c1', 'c3', 'c1', 'c0', 'c1', 'c3', 'c1', 'c1', 'c3', 'c2', 'c1',
+                'c2', 'c3', 'c2', 'c4', 'c1', 'c3', 'c2', 'c0', 'c0', 'c1', 'c1',
+                'c3', 'c1', 'c1', 'c3', 'c1', 'c1', 'c1', 'c1', 'c4', 'c3', 'c0',
+                'c3', 'c1', 'c2', 'c3', 'c1', 'c2', 'c1', 'c1', 'c1', 'c3', 'c3',
+                'c2', 'c3', 'c1', 'c1', 'c4', 'c1'
+            ], dtype=object)
+        ))
+        assert_equal(predictions[1][:5], np.array([
+            [-131.46105165,  -78.80931343,  -99.35179093,  -90.89464994, -483.92229446],
+            [ -91.58935678,  -66.6556658 ,  -91.46883547,  -65.69934269, -716.797869  ],
+            [ -97.5230626 ,  -74.50878143,  -99.1544397 ,  -76.48361176, -690.2988915 ],
+            [  14.24986519,  -44.85298283,  -41.50143234,  -40.50844881, -148.67734234],
+            [ -95.11368472,  -40.81069058,  -59.46841129,  -52.60034218, -430.36823963]
         ]))
     finally:
         os.remove('test.pkl')
\ No newline at end of file