From 08c1e9973a8ff34b64effbdf712658012d539059 Mon Sep 17 00:00:00 2001 From: Edwin Onuonga Date: Sun, 26 Jun 2022 16:23:27 +0100 Subject: [PATCH 1/2] Update test_gmmhmm.py --- lib/test/lib/classifiers/hmm/test_gmmhmm.py | 167 ++++++++++---------- 1 file changed, 82 insertions(+), 85 deletions(-) diff --git a/lib/test/lib/classifiers/hmm/test_gmmhmm.py b/lib/test/lib/classifiers/hmm/test_gmmhmm.py index bf99992..ec95131 100644 --- a/lib/test/lib/classifiers/hmm/test_gmmhmm.py +++ b/lib/test/lib/classifiers/hmm/test_gmmhmm.py @@ -1,23 +1,23 @@ -import pytest, warnings, os, numpy as np, hmmlearn.base, hmmlearn.hmm +import pytest, warnings, os, math, numpy as np, hmmlearn.base, hmmlearn.hmm from copy import deepcopy from sequentia.classifiers import GMMHMM, _LeftRightTopology, _ErgodicTopology, _LinearTopology +from sequentia.datasets import load_random_sequences from ....support import assert_equal, assert_not_equal, assert_all_equal, assert_all_not_equal -pytest.skip('Skip until datasets module is added and positive definite issues are fixed', allow_module_level=True) +# pytest.skip('Skip until datasets module is added and positive definite issues are fixed', allow_module_level=True) # Set seed for reproducible randomness -seed = 0 -np.random.seed(seed) -rng = np.random.RandomState(seed) +random_state = np.random.RandomState(0) # Create some sample data -X = [rng.random((10 * i, 3)) for i in range(1, 4)] -x = rng.random((15, 3)) +dataset = load_random_sequences(15, n_features=2, n_classes=2, length_range=(20, 30), random_state=random_state) +X = [x for x, y in dataset if y == 0] +x = X[0] # Unparameterized HMMs -hmm_lr = GMMHMM(label='c1', n_states=5, topology='left-right', random_state=rng) -hmm_e = GMMHMM(label='c1', n_states=5, topology='ergodic', random_state=rng) -hmm_lin = GMMHMM(label='c1', n_states=5, topology='linear', random_state=rng) +hmm_lr = GMMHMM(label='c1', n_states=5, topology='left-right', random_state=random_state) +hmm_e = GMMHMM(label='c1', n_states=5, topology='ergodic', random_state=random_state) +hmm_lin = GMMHMM(label='c1', n_states=5, topology='linear', random_state=random_state) # ========================================================= # # GMMHMM.set_uniform_initial() + GMMHMM.initial_ (property) # @@ -56,7 +56,7 @@ def test_left_right_random_initial(): hmm = deepcopy(hmm_lr) hmm.set_random_initial() assert_equal(hmm.initial_, np.array([ - 0.35029635, 0.13344569, 0.02784745, 0.33782453, 0.15058597 + 0.53803322, 0.12404781, 0.07762362, 0.17663443, 0.08366092 ])) def test_ergodic_random_initial(): @@ -64,7 +64,7 @@ def test_ergodic_random_initial(): hmm = deepcopy(hmm_e) hmm.set_random_initial() assert_equal(hmm.initial_, np.array([ - 0.35029635, 0.13344569, 0.02784745, 0.33782453, 0.15058597 + 0.53803322, 0.12404781, 0.07762362, 0.17663443, 0.08366092 ])) def test_linear_random_initial(): @@ -72,7 +72,7 @@ def test_linear_random_initial(): hmm = deepcopy(hmm_lin) hmm.set_random_initial() assert_equal(hmm.initial_, np.array([ - 0.35029635, 0.13344569, 0.02784745, 0.33782453, 0.15058597 + 0.53803322, 0.12404781, 0.07762362, 0.17663443, 0.08366092 ])) # ================================================================= # @@ -124,10 +124,10 @@ def test_left_right_random_transitions(): hmm = deepcopy(hmm_lr) hmm.set_random_transitions() assert_equal(hmm.transitions_, np.array([ - [0.35029635, 0.13344569, 0.02784745, 0.33782453, 0.15058597], - [0. , 0.22725263, 0.18611702, 0.56646299, 0.02016736], - [0. , 0. , 0.18542075, 0.44084593, 0.37373332], - [0. , 0. , 0. , 0.65696153, 0.34303847], + [0.53803322, 0.12404781, 0.07762362, 0.17663443, 0.08366092], + [0. , 0.0544546 , 0.167254 , 0.43679272, 0.34149867], + [0. , 0. , 0.02569653, 0.93686415, 0.03743932], + [0. , 0. , 0. , 0.80245882, 0.19754118], [0. , 0. , 0. , 0. , 1. ] ])) @@ -136,11 +136,11 @@ def test_ergodic_random_transitions(): hmm = deepcopy(hmm_e) hmm.set_random_transitions() assert_equal(hmm.transitions_, np.array([ - [0.35029635, 0.13344569, 0.02784745, 0.33782453, 0.15058597], - [0.19252534, 0.15767581, 0.47989976, 0.01708551, 0.15281357], - [0.19375092, 0.16425506, 0.21828034, 0.11397708, 0.30973661], - [0.46906977, 0.02941216, 0.17137502, 0.0333193 , 0.29682374], - [0.21312406, 0.35221103, 0.08556524, 0.06613143, 0.28296824] + [0.53803322, 0.12404781, 0.07762362, 0.17663443, 0.08366092], + [0.05407134, 0.16607684, 0.43371851, 0.33909515, 0.00703816], + [0.12935118, 0.00516918, 0.67173292, 0.16536041, 0.02838631], + [0.15768347, 0.31907791, 0.42873228, 0.06083948, 0.03366686], + [0.42607069, 0.17697038, 0.33288653, 0.04212738, 0.02194502] ])) def test_linear_random_transitions(): @@ -148,10 +148,10 @@ def test_linear_random_transitions(): hmm = deepcopy(hmm_lin) hmm.set_random_transitions() assert_equal(hmm.transitions_, np.array([ - [0.72413873, 0.27586127, 0. , 0. , 0. ], - [0. , 0.07615418, 0.92384582, 0. , 0. ], - [0. , 0. , 0.81752797, 0.18247203, 0. ], - [0. , 0. , 0. , 0.24730529, 0.75269471], + [0.81263954, 0.18736046, 0. , 0. , 0. ], + [0. , 0.30529464, 0.69470536, 0. , 0. ], + [0. , 0. , 0.34435856, 0.65564144, 0. ], + [0. , 0. , 0. , 0.27688918, 0.72311082], [0. , 0. , 0. , 0. , 1. ] ])) @@ -188,7 +188,7 @@ def test_fit_sets_internals(): hmm.set_uniform_initial() hmm.set_uniform_transitions() hmm.fit(X) - assert hmm.n_seqs_ == 3 + assert hmm.n_seqs_ == 8 assert isinstance(hmm.model, hmmlearn.hmm.GMMHMM) def test_left_right_fit_updates_uniform_initial(): @@ -210,7 +210,7 @@ def test_left_right_fit_updates_random_initial(): hmm.set_random_transitions() before = hmm.initial_ assert_equal(before, np.array([ - 0.35029635, 0.13344569, 0.02784745, 0.33782453, 0.15058597 + 0.53803322, 0.12404781, 0.07762362, 0.17663443, 0.08366092 ])) hmm.fit(X) assert_not_equal(before, hmm.initial_) @@ -238,10 +238,10 @@ def test_left_right_fit_updates_random_transitions(): hmm.set_random_transitions() before = hmm.transitions_ assert_equal(before, np.array([ - [0.19252534, 0.15767581, 0.47989976, 0.01708551, 0.15281357], - [0. , 0.28069128, 0.23795997, 0.31622761, 0.16512114], - [0. , 0. , 0.29431489, 0.66404724, 0.04163787], - [0. , 0. , 0. , 0.8372241 , 0.1627759 ], + [0.05407134, 0.16607684, 0.43371851, 0.33909515, 0.00703816], + [0. , 0.13313025, 0.0053202 , 0.69135803, 0.17019152], + [0. , 0. , 0.11443295, 0.29289135, 0.59267569], + [0. , 0. , 0. , 0.87572918, 0.12427082], [0. , 0. , 0. , 0. , 1. ] ])) hmm.fit(X) @@ -266,7 +266,7 @@ def test_ergodic_fit_updates_random_initial(): hmm.set_random_transitions() before = hmm.initial_ assert_equal(before, np.array([ - 0.35029635, 0.13344569, 0.02784745, 0.33782453, 0.15058597 + 0.53803322, 0.12404781, 0.07762362, 0.17663443, 0.08366092 ])) hmm.fit(X) assert_not_equal(before, hmm.initial_) @@ -294,11 +294,12 @@ def test_ergodic_fit_updates_random_transitions(): hmm.set_random_transitions() before = hmm.transitions_ assert_equal(before, np.array([ - [0.19252534, 0.15767581, 0.47989976, 0.01708551, 0.15281357], - [0.19375092, 0.16425506, 0.21828034, 0.11397708, 0.30973661], - [0.46906977, 0.02941216, 0.17137502, 0.0333193 , 0.29682374], - [0.21312406, 0.35221103, 0.08556524, 0.06613143, 0.28296824], - [0.05212313, 0.3345513 , 0.17192948, 0.16379392, 0.27760217]])) + [0.05407134, 0.16607684, 0.43371851, 0.33909515, 0.00703816], + [0.12935118, 0.00516918, 0.67173292, 0.16536041, 0.02838631], + [0.15768347, 0.31907791, 0.42873228, 0.06083948, 0.03366686], + [0.42607069, 0.17697038, 0.33288653, 0.04212738, 0.02194502], + [0.20328414, 0.13729798, 0.03560389, 0.4874536 , 0.13636039] + ])) hmm.fit(X) assert_not_equal(before, hmm.transitions_) @@ -321,7 +322,7 @@ def test_linear_fit_updates_random_initial(): hmm.set_random_transitions() before = hmm.initial_ assert_equal(before, np.array([ - 0.35029635, 0.13344569, 0.02784745, 0.33782453, 0.15058597 + 0.53803322, 0.12404781, 0.07762362, 0.17663443, 0.08366092 ])) hmm.fit(X) assert_not_equal(before, hmm.initial_) @@ -348,11 +349,12 @@ def test_linear_fit_updates_random_transitions(): hmm.set_random_initial() hmm.set_random_transitions() before = hmm.transitions_ + print(repr(before)) assert_equal(before, np.array([ - [0.54975645, 0.45024355, 0. , 0. , 0. ], - [0. , 0.96562169, 0.03437831, 0. , 0. ], - [0. , 0. , 0.29607315, 0.70392685, 0. ], - [0. , 0. , 0. , 0.42938524, 0.57061476], + [0.24561338, 0.75438662, 0. , 0. , 0. ], + [0. , 0.56122003, 0.43877997, 0. , 0. ], + [0. , 0. , 0.02669601, 0.97330399, 0. ], + [0. , 0. , 0. , 0.00763653, 0.99236347], [0. , 0. , 0. , 0. , 1. ] ])) hmm.fit(X) @@ -377,7 +379,7 @@ def test_left_right_forward(): hmm.set_random_initial() hmm.set_random_transitions() hmm.fit(X) - assert isinstance(hmm.forward(x), float) + assert math.isclose(hmm.forward(x), -89.59052551245605) def test_ergodic_forward(): """Forward algorithm on an ergodic HMM""" @@ -385,7 +387,7 @@ def test_ergodic_forward(): hmm.set_random_initial() hmm.set_random_transitions() hmm.fit(X) - assert isinstance(hmm.forward(x), float) + assert math.isclose(hmm.forward(x), -97.67911812603418) def test_linear_forward(): """Forward algorithm on a linear HMM""" @@ -393,7 +395,7 @@ def test_linear_forward(): hmm.set_random_initial() hmm.set_random_transitions() hmm.fit(X) - assert isinstance(hmm.forward(x), float) + assert math.isclose(hmm.forward(x), -90.25666060143605) # =============== # # GMMHMM.freeze() # @@ -574,7 +576,7 @@ def test_n_seqs_with_fit(): hmm.set_random_initial() hmm.set_random_transitions() hmm.fit(X) - assert hmm.n_seqs_ == 3 + assert hmm.n_seqs_ == 8 # ======================== # # GMMHMM.frozen (property) # @@ -653,11 +655,11 @@ def test_means_with_fit(): hmm.set_random_transitions() hmm.fit(X) assert_equal(hmm.means_, np.array([ - [[0.31874666, 0.66724147, 0.13182087]], - [[0.31856896, 0.66741038, 0.13179786]], - [[0.71632403, 0.28939952, 0.18320713]], - [[0.51787902, 0.57561888, 0.5995548 ]], - [[0.66975947, 0.26867588, 0.25477769]] + [[ 0.49517361, 0.79670013]], + [[ 1.81277369, -2.45995611]], + [[-0.61198527, -0.2621587 ]], + [[ 1.40168717, 0.16718235]], + [[-2.05338535, -3.13926956]] ])) # ========================= # @@ -680,21 +682,16 @@ def test_covars_with_fit(): hmm.set_random_transitions() hmm.fit(X) assert_equal(hmm.covars_, np.array([ - [[[ 0.08307002, 0.00160875, 0.0157381 ], - [ 0.00160875, 0.08735411, -0.01063379], - [ 0.0157381 , -0.01063379, 0.08286247]]], - [[[ 0.08307002, 0.00160875, 0.0157381 ], - [ 0.00160875, 0.08735411, -0.01063379], - [ 0.0157381 , -0.01063379, 0.08286247]]], - [[[ 0.08307002, 0.00160875, 0.0157381 ], - [ 0.00160875, 0.08735411, -0.01063379], - [ 0.0157381 , -0.01063379, 0.08286247]]], - [[[ 0.08307002, 0.00160875, 0.0157381 ], - [ 0.00160875, 0.08735411, -0.01063379], - [ 0.0157381 , -0.01063379, 0.08286247]]], - [[[ 0.08307002, 0.00160875, 0.0157381 ], - [ 0.00160875, 0.08735411, -0.01063379], - [ 0.0157381 , -0.01063379, 0.08286247]]] + [[[ 1.38488559, 0.38570541], + [ 0.38570541, 0.63293189]]], + [[[ 1.73706667, 0.28568952], + [ 0.28568952, 0.47176263]]], + [[[ 0.99011246, -0.10938155], + [-0.10938155, 0.01847633]]], + [[[ 1.94877336, -0.17877102], + [-0.17877102, 2.42880862]]], + [[[ 3.52820275, 0.5571457 ], + [ 0.5571457 , 0.2716629 ]]] ])) # ========================== # @@ -726,7 +723,7 @@ def test_transitions_without_setting(): def test_left_right_initial_left_right(): """Set an initial state distribution generated by a left-right topology on a left-right HMM""" hmm = deepcopy(hmm_lr) - topology = _LeftRightTopology(n_states=5, random_state=rng) + topology = _LeftRightTopology(n_states=5, random_state=random_state) initial = topology.random_initial() hmm.initial_ = initial assert_equal(hmm.initial_, initial) @@ -734,7 +731,7 @@ def test_left_right_initial_left_right(): def test_left_right_initial_ergodic(): """Set an initial state distribution generated by a left-right topology on an ergodic HMM""" hmm = deepcopy(hmm_lr) - topology = _ErgodicTopology(n_states=5, random_state=rng) + topology = _ErgodicTopology(n_states=5, random_state=random_state) initial = topology.random_initial() hmm.initial_ = initial assert_equal(hmm.initial_, initial) @@ -742,7 +739,7 @@ def test_left_right_initial_ergodic(): def test_left_right_initial_linear(): """Set an initial state distribution generated by a left-right topology on an linear HMM""" hmm = deepcopy(hmm_lr) - topology = _LinearTopology(n_states=5, random_state=rng) + topology = _LinearTopology(n_states=5, random_state=random_state) initial = topology.random_initial() hmm.initial_ = initial assert_equal(hmm.initial_, initial) @@ -750,7 +747,7 @@ def test_left_right_initial_linear(): def test_ergodic_initial_left_right(): """Set an initial state distribution generated by an ergodic topology on a left-right HMM""" hmm = deepcopy(hmm_e) - topology = _LeftRightTopology(n_states=5, random_state=rng) + topology = _LeftRightTopology(n_states=5, random_state=random_state) initial = topology.random_initial() hmm.initial_ = initial assert_equal(hmm.initial_, initial) @@ -758,7 +755,7 @@ def test_ergodic_initial_left_right(): def test_ergodic_initial_ergodic(): """Set an initial state distribution generated by an ergodic topology on an ergodic HMM""" hmm = deepcopy(hmm_e) - topology = _ErgodicTopology(n_states=5, random_state=rng) + topology = _ErgodicTopology(n_states=5, random_state=random_state) initial = topology.random_initial() hmm.initial_ = initial assert_equal(hmm.initial_, initial) @@ -766,7 +763,7 @@ def test_ergodic_initial_ergodic(): def test_ergodic_initial_linear(): """Set an initial state distribution generated by an ergodic topology on a linear HMM""" hmm = deepcopy(hmm_e) - topology = _LinearTopology(n_states=5, random_state=rng) + topology = _LinearTopology(n_states=5, random_state=random_state) initial = topology.random_initial() hmm.initial_ = initial assert_equal(hmm.initial_, initial) @@ -774,7 +771,7 @@ def test_ergodic_initial_linear(): def test_linear_initial_left_right(): """Set an initial state distribution generated by a linear topology on a left-right HMM""" hmm = deepcopy(hmm_lin) - topology = _LeftRightTopology(n_states=5, random_state=rng) + topology = _LeftRightTopology(n_states=5, random_state=random_state) initial = topology.random_initial() hmm.initial_ = initial assert_equal(hmm.initial_, initial) @@ -782,7 +779,7 @@ def test_linear_initial_left_right(): def test_linear_initial_ergodic(): """Set an initial state distribution generated by a linear topology on an ergodic HMM""" hmm = deepcopy(hmm_lin) - topology = _ErgodicTopology(n_states=5, random_state=rng) + topology = _ErgodicTopology(n_states=5, random_state=random_state) initial = topology.random_initial() hmm.initial_ = initial assert_equal(hmm.initial_, initial) @@ -790,7 +787,7 @@ def test_linear_initial_ergodic(): def test_linear_initial_linear(): """Set an initial state distribution generated by a linear topology on an linear HMM""" hmm = deepcopy(hmm_lin) - topology = _LinearTopology(n_states=5, random_state=rng) + topology = _LinearTopology(n_states=5, random_state=random_state) initial = topology.random_initial() hmm.initial_ = initial assert_equal(hmm.initial_, initial) @@ -802,7 +799,7 @@ def test_linear_initial_linear(): def test_left_right_transitions_left_right(): """Set a transition matrix generated by a left-right topology on a left-right HMM""" hmm = deepcopy(hmm_lr) - topology = _LeftRightTopology(n_states=5, random_state=rng) + topology = _LeftRightTopology(n_states=5, random_state=random_state) transitions = topology.random_transitions() hmm.transitions_ = transitions assert_equal(hmm.transitions_, transitions) @@ -810,7 +807,7 @@ def test_left_right_transitions_left_right(): def test_left_right_transitions_ergodic(): """Set a transition matrix generated by a left-right topology on an ergodic HMM""" hmm = deepcopy(hmm_lr) - topology = _ErgodicTopology(n_states=5, random_state=rng) + topology = _ErgodicTopology(n_states=5, random_state=random_state) transitions = topology.random_transitions() with pytest.raises(ValueError) as e: hmm.transitions_ = transitions @@ -819,7 +816,7 @@ def test_left_right_transitions_ergodic(): def test_left_right_transitions_linear(): """Set a transition matrix generated by a left-right topology on a linear HMM""" hmm = deepcopy(hmm_lr) - topology = _LinearTopology(n_states=5, random_state=rng) + topology = _LinearTopology(n_states=5, random_state=random_state) transitions = topology.random_transitions() hmm.transitions_ = transitions assert_equal(hmm.transitions_, transitions) @@ -827,7 +824,7 @@ def test_left_right_transitions_linear(): def test_ergodic_transitions_left_right(): """Set a transition matrix generated by an ergodic topology on a left-right HMM""" hmm = deepcopy(hmm_e) - topology = _LeftRightTopology(n_states=5, random_state=rng) + topology = _LeftRightTopology(n_states=5, random_state=random_state) transitions = topology.random_transitions() with pytest.warns(UserWarning) as w: hmm.transitions_ = transitions @@ -837,7 +834,7 @@ def test_ergodic_transitions_left_right(): def test_ergodic_transitions_ergodic(): """Set a transition matrix generated by an ergodic topology on an ergodic HMM""" hmm = deepcopy(hmm_e) - topology = _ErgodicTopology(n_states=5, random_state=rng) + topology = _ErgodicTopology(n_states=5, random_state=random_state) transitions = topology.random_transitions() hmm.transitions_ = transitions assert_equal(hmm.transitions_, transitions) @@ -845,7 +842,7 @@ def test_ergodic_transitions_ergodic(): def test_ergodic_transitions_linear(): """Set a transition matrix generated by an ergodic topology on a linear HMM""" hmm = deepcopy(hmm_e) - topology = _LinearTopology(n_states=5, random_state=rng) + topology = _LinearTopology(n_states=5, random_state=random_state) transitions = topology.random_transitions() with pytest.warns(UserWarning) as w: hmm.transitions_ = transitions @@ -855,7 +852,7 @@ def test_ergodic_transitions_linear(): def test_linear_transitions_left_right(): """Set a transition matrix generated by a linear topology on a left-right HMM""" hmm = deepcopy(hmm_lin) - topology = _LeftRightTopology(n_states=5, random_state=rng) + topology = _LeftRightTopology(n_states=5, random_state=random_state) transitions = topology.random_transitions() with pytest.raises(ValueError) as e: hmm.transitions_ = transitions @@ -864,7 +861,7 @@ def test_linear_transitions_left_right(): def test_linear_transitions_ergodic(): """Set a transition matrix generated by a linear topology on an ergodic HMM""" hmm = deepcopy(hmm_lin) - topology = _ErgodicTopology(n_states=5, random_state=rng) + topology = _ErgodicTopology(n_states=5, random_state=random_state) transitions = topology.random_transitions() with pytest.raises(ValueError) as e: hmm.transitions_ = transitions @@ -873,7 +870,7 @@ def test_linear_transitions_ergodic(): def test_linear_transitions_linear(): """Set a transition matrix generated by a linear topology on a linear HMM""" hmm = deepcopy(hmm_lin) - topology = _LinearTopology(n_states=5, random_state=rng) + topology = _LinearTopology(n_states=5, random_state=random_state) transitions = topology.random_transitions() hmm.transitions_ = transitions assert_equal(hmm.transitions_, transitions) \ No newline at end of file From 9def4b63440feac3cd8da20237593ab0c3efa8da Mon Sep 17 00:00:00 2001 From: Edwin Onuonga Date: Sun, 26 Jun 2022 17:31:36 +0100 Subject: [PATCH 2/2] Fix HMM tests --- lib/test/lib/classifiers/hmm/test_gmmhmm.py | 2 - .../classifiers/hmm/test_hmm_classifier.py | 163 +++++++++++------- 2 files changed, 105 insertions(+), 60 deletions(-) diff --git a/lib/test/lib/classifiers/hmm/test_gmmhmm.py b/lib/test/lib/classifiers/hmm/test_gmmhmm.py index ec95131..e148337 100644 --- a/lib/test/lib/classifiers/hmm/test_gmmhmm.py +++ b/lib/test/lib/classifiers/hmm/test_gmmhmm.py @@ -4,8 +4,6 @@ from sequentia.datasets import load_random_sequences from ....support import assert_equal, assert_not_equal, assert_all_equal, assert_all_not_equal -# pytest.skip('Skip until datasets module is added and positive definite issues are fixed', allow_module_level=True) - # Set seed for reproducible randomness random_state = np.random.RandomState(0) diff --git a/lib/test/lib/classifiers/hmm/test_hmm_classifier.py b/lib/test/lib/classifiers/hmm/test_hmm_classifier.py index 8fba993..72249c1 100644 --- a/lib/test/lib/classifiers/hmm/test_hmm_classifier.py +++ b/lib/test/lib/classifiers/hmm/test_hmm_classifier.py @@ -1,32 +1,29 @@ import os, pickle, pytest, warnings, os, numpy as np, hmmlearn.hmm from copy import deepcopy from sequentia.classifiers import GMMHMM, HMMClassifier, _ErgodicTopology +from sequentia.datasets import load_random_sequences from ....support import assert_equal, assert_not_equal -pytest.skip('Skip until datasets module is added and positive definite issues are fixed', allow_module_level=True) +# pytest.skip('Skip until datasets module is added and positive definite issues are fixed', allow_module_level=True) # Set seed for reproducible randomness -seed = 0 -np.random.seed(seed) -rng = np.random.RandomState(seed) +random_state = np.random.RandomState(0) -# Set of possible labels -labels = ['c{}'.format(i) for i in range(5)] +# Create some sample data +dataset = load_random_sequences(50, n_features=2, n_classes=5, length_range=(10, 30), random_state=random_state) +dataset.classes = [f'c{i}' for i in range(5)] +dataset.y = np.array([f'c{i}' for i in dataset.y]) +x, y = dataset[0] -# Create and fit some sample HMMs +# Create and fit some HMMs hmms = [] -for i, label in enumerate(labels): - hmm = GMMHMM(label=label, n_states=(i + 3), random_state=rng) +for sequences, label in dataset.iter_by_class(): + hmm = GMMHMM(label=label, n_states=5, random_state=random_state) hmm.set_random_initial() hmm.set_random_transitions() - hmm.fit([np.arange((i + j * 20) * 30).reshape(-1, 3) for j in range(2, 5)]) + hmm.fit(sequences) hmms.append(hmm) -# Create some sample test data and labels -X = [np.arange((i + 2 * 20) * 30).reshape(-1, 3) for i in range(2, 5)] -Y = ['c0', 'c1', 'c1'] -x, y = X[0], 'c1' - # Fit a classifier hmm_clf = HMMClassifier() hmm_clf.fit(hmms) @@ -62,81 +59,119 @@ def test_fit_list_invalid(): def test_predict_single_frequency_prior(): """Predict a single observation sequence with a frequency prior""" prediction = hmm_clf.predict(x, prior='frequency', return_scores=False, original_labels=False) - assert prediction == 0 + assert prediction == 1 def test_predict_single_uniform_prior(): """Predict a single observation sequence with a uniform prior""" prediction = hmm_clf.predict(x, prior='uniform', return_scores=False, original_labels=False) - assert prediction == 0 + assert prediction == 1 def test_predict_single_custom_prior(): """Predict a single observation sequence with a custom prior""" prediction = hmm_clf.predict(x, prior=([1e-50]*4+[1-4e-50]), return_scores=False, original_labels=False) - assert prediction == 4 + assert prediction == 1 def test_predict_single_return_scores(): """Predict a single observation sequence and return the transformed label, with the un-normalized posterior scores""" prediction = hmm_clf.predict(x, prior='frequency', return_scores=True, original_labels=False) assert isinstance(prediction, tuple) - assert prediction[0] == 0 + assert prediction[0] == 1 assert_equal(prediction[1], np.array([ - -1225.88304108, -1266.85875999, -1266.96016441, -1226.97939403, -1274.89102844 + -131.46105165, -78.80931343, -99.35179093, -90.89464994, -483.92229446 ])) def test_predict_single_original_labels(): """Predict a single observation sequence and return the original label, without the un-normalized posterior scores""" prediction = hmm_clf.predict(x, prior='uniform', return_scores=False, original_labels=True) - assert prediction == 'c0' + assert prediction == 'c1' def test_predict_single_return_scores_original_labels(): """Predict a single observation sequence and return the original label, with the un-normalized posterior scores""" prediction = hmm_clf.predict(x, prior='frequency', return_scores=True, original_labels=True) assert isinstance(prediction, tuple) - assert prediction[0] == 'c0' + assert prediction[0] == 'c1' assert_equal(prediction[1], np.array([ - -1225.88304108, -1266.85875999, -1266.96016441, -1226.97939403, -1274.89102844 + -131.46105165, -78.80931343, -99.35179093, -90.89464994, -483.92229446 ])) def test_predict_multiple_frequency_prior(): """Predict multiple observation sequences with a frequency prior""" - predictions = hmm_clf.predict(X, prior='frequency', return_scores=False, original_labels=False) - assert_equal(predictions, np.array([0, 0, 0])) + predictions = hmm_clf.predict(dataset.X, prior='frequency', return_scores=False, original_labels=False) + assert_equal(predictions, np.array([ + 1, 3, 1, 0, 1, 3, 1, 1, 3, 2, 1, 2, 3, 2, 4, 1, 3, 2, 0, 0, 1, 1, + 3, 1, 1, 3, 1, 1, 1, 1, 4, 3, 0, 3, 1, 2, 3, 1, 2, 1, 1, 1, 3, 3, + 2, 3, 1, 1, 4, 1 + ])) def test_predict_multiple_uniform_prior(): """Predict multiple observation sequences with a uniform prior""" - predictions = hmm_clf.predict(X, prior='uniform', return_scores=False, original_labels=False) - assert_equal(predictions, np.array([0, 0, 0])) + predictions = hmm_clf.predict(dataset.X, prior='uniform', return_scores=False, original_labels=False) + assert_equal(predictions, np.array([ + 1, 3, 1, 0, 1, 3, 1, 1, 3, 2, 1, 2, 3, 2, 4, 1, 3, 2, 0, 0, 1, 1, + 3, 1, 1, 3, 1, 1, 1, 1, 4, 3, 0, 3, 1, 2, 3, 1, 2, 1, 1, 1, 3, 3, + 2, 3, 1, 1, 4, 1 + ])) def test_predict_multiple_custom_prior(): """Predict multiple observation sequences with a custom prior""" - predictions = hmm_clf.predict(X, prior=([1-4e-50]+[1e-50]*4), return_scores=False, original_labels=False) - assert_equal(predictions, np.array([0, 0, 0])) + predictions = hmm_clf.predict(dataset.X, prior=([1-4e-50]+[1e-50]*4), return_scores=False, original_labels=False) + assert_equal(predictions, np.array([ + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0 + ])) def test_predict_multiple_return_scores(): """Predict multiple observation sequences and return the transformed labels, with the un-normalized posterior scores""" - predictions = hmm_clf.predict(X, prior='frequency', return_scores=True, original_labels=False) + predictions = hmm_clf.predict(dataset.X, prior='frequency', return_scores=True, original_labels=False) assert isinstance(predictions, tuple) - assert_equal(predictions[0], np.array([0, 0, 0])) - assert_equal(predictions[1], np.array([ - [-1225.88304108, -1266.85875999, -1266.96016441, -1226.97939403, -1274.89102844], - [-1254.2158035 , -1299.37586652, -1299.75108935, -1255.8359274 , -1308.71071239], - [-1282.57116414, -1330.90436081, -1331.63379359, -1284.79130597, -1342.45717804] + assert_equal(predictions[0], np.array([ + 1, 3, 1, 0, 1, 3, 1, 1, 3, 2, 1, 2, 3, 2, 4, 1, 3, 2, 0, 0, 1, 1, + 3, 1, 1, 3, 1, 1, 1, 1, 4, 3, 0, 3, 1, 2, 3, 1, 2, 1, 1, 1, 3, 3, + 2, 3, 1, 1, 4, 1 + ])) + assert_equal(predictions[1][:5], np.array([ + [-131.46105165, -78.80931343, -99.35179093, -90.89464994, -483.92229446], + [ -91.58935678, -66.6556658 , -91.46883547, -65.69934269, -716.797869 ], + [ -97.5230626 , -74.50878143, -99.1544397 , -76.48361176, -690.2988915 ], + [ 14.24986519, -44.85298283, -41.50143234, -40.50844881, -148.67734234], + [ -95.11368472, -40.81069058, -59.46841129, -52.60034218, -430.36823963] ])) def test_predict_multiple_original_labels(): """Predict multiple observation sequences and return the original labels, without the un-normalized posterior scores""" - predictions = hmm_clf.predict(X, prior='frequency', return_scores=False, original_labels=True) - assert all(np.equal(predictions.astype(object), np.array(['c0', 'c0', 'c0'], dtype=object))) + predictions = hmm_clf.predict(dataset.X, prior='frequency', return_scores=False, original_labels=True) + assert all(np.equal( + predictions.astype(object), + np.array([ + 'c1', 'c3', 'c1', 'c0', 'c1', 'c3', 'c1', 'c1', 'c3', 'c2', 'c1', + 'c2', 'c3', 'c2', 'c4', 'c1', 'c3', 'c2', 'c0', 'c0', 'c1', 'c1', + 'c3', 'c1', 'c1', 'c3', 'c1', 'c1', 'c1', 'c1', 'c4', 'c3', 'c0', + 'c3', 'c1', 'c2', 'c3', 'c1', 'c2', 'c1', 'c1', 'c1', 'c3', 'c3', + 'c2', 'c3', 'c1', 'c1', 'c4', 'c1' + ], dtype=object) + )) def test_predict_multiple_return_scores_original_labels(): """Predict multiple observation sequences and return the original labels, with the un-normalized posterior scores""" - predictions = hmm_clf.predict(X, prior='frequency', return_scores=True, original_labels=True) + predictions = hmm_clf.predict(dataset.X, prior='frequency', return_scores=True, original_labels=True) assert isinstance(predictions, tuple) - assert all(np.equal(predictions[0].astype(object), np.array(['c0', 'c0', 'c0'], dtype=object))) - assert_equal(predictions[1], np.array([ - [-1225.88304108, -1266.85875999, -1266.96016441, -1226.97939403, -1274.89102844], - [-1254.2158035 , -1299.37586652, -1299.75108935, -1255.8359274 , -1308.71071239], - [-1282.57116414, -1330.90436081, -1331.63379359, -1284.79130597, -1342.45717804] + assert all(np.equal( + predictions[0].astype(object), + np.array([ + 'c1', 'c3', 'c1', 'c0', 'c1', 'c3', 'c1', 'c1', 'c3', 'c2', 'c1', + 'c2', 'c3', 'c2', 'c4', 'c1', 'c3', 'c2', 'c0', 'c0', 'c1', 'c1', + 'c3', 'c1', 'c1', 'c3', 'c1', 'c1', 'c1', 'c1', 'c4', 'c3', 'c0', + 'c3', 'c1', 'c2', 'c3', 'c1', 'c2', 'c1', 'c1', 'c1', 'c3', 'c3', + 'c2', 'c3', 'c1', 'c1', 'c4', 'c1' + ], dtype=object) + )) + assert_equal(predictions[1][:5], np.array([ + [-131.46105165, -78.80931343, -99.35179093, -90.89464994, -483.92229446], + [ -91.58935678, -66.6556658 , -91.46883547, -65.69934269, -716.797869 ], + [ -97.5230626 , -74.50878143, -99.1544397 , -76.48361176, -690.2988915 ], + [ 14.24986519, -44.85298283, -41.50143234, -40.50844881, -148.67734234], + [ -95.11368472, -40.81069058, -59.46841129, -52.60034218, -430.36823963] ])) # ======================== # @@ -145,14 +180,15 @@ def test_predict_multiple_return_scores_original_labels(): def test_evaluate(): """Evaluate performance on some observation sequences and labels""" - acc, cm = hmm_clf.evaluate(X, Y, prior='frequency') - assert acc == 1 / 3 + acc, cm = hmm_clf.evaluate(dataset.X, dataset.y, prior='frequency') + assert acc == 0.92 + print(repr(cm)) assert_equal(cm, np.array([ - [1, 0, 0, 0, 0], - [2, 0, 0, 0, 0], - [0, 0, 0, 0, 0], - [0, 0, 0, 0, 0], - [0, 0, 0, 0, 0] + [ 4, 0, 0, 0, 0], + [ 0, 20, 0, 1, 0], + [ 0, 1, 7, 0, 0], + [ 0, 2, 0, 12, 0], + [ 0, 0, 0, 0, 3] ])) # ==================== # @@ -199,15 +235,26 @@ def test_load_valid(): # Check that all fields are still the same assert isinstance(clf, HMMClassifier) assert all(isinstance(model, GMMHMM) for model in clf.models_) - assert [model.label for model in clf.models_] == labels - assert list(clf.encoder_.classes_) == labels - predictions = clf.predict(X, prior='frequency', return_scores=True, original_labels=True) + assert [model.label for model in clf.models_] == dataset.classes + assert list(clf.encoder_.classes_) == dataset.classes + predictions = clf.predict(dataset.X, prior='frequency', return_scores=True, original_labels=True) assert isinstance(predictions, tuple) - assert all(np.equal(predictions[0].astype(object), np.array(['c0', 'c0', 'c0'], dtype=object))) - assert_equal(predictions[1], np.array([ - [-1225.88304108, -1266.85875999, -1266.96016441, -1226.97939403, -1274.89102844], - [-1254.2158035 , -1299.37586652, -1299.75108935, -1255.8359274 , -1308.71071239], - [-1282.57116414, -1330.90436081, -1331.63379359, -1284.79130597, -1342.45717804] + assert all(np.equal( + predictions[0].astype(object), + np.array([ + 'c1', 'c3', 'c1', 'c0', 'c1', 'c3', 'c1', 'c1', 'c3', 'c2', 'c1', + 'c2', 'c3', 'c2', 'c4', 'c1', 'c3', 'c2', 'c0', 'c0', 'c1', 'c1', + 'c3', 'c1', 'c1', 'c3', 'c1', 'c1', 'c1', 'c1', 'c4', 'c3', 'c0', + 'c3', 'c1', 'c2', 'c3', 'c1', 'c2', 'c1', 'c1', 'c1', 'c3', 'c3', + 'c2', 'c3', 'c1', 'c1', 'c4', 'c1' + ], dtype=object) + )) + assert_equal(predictions[1][:5], np.array([ + [-131.46105165, -78.80931343, -99.35179093, -90.89464994, -483.92229446], + [ -91.58935678, -66.6556658 , -91.46883547, -65.69934269, -716.797869 ], + [ -97.5230626 , -74.50878143, -99.1544397 , -76.48361176, -690.2988915 ], + [ 14.24986519, -44.85298283, -41.50143234, -40.50844881, -148.67734234], + [ -95.11368472, -40.81069058, -59.46841129, -52.60034218, -430.36823963] ])) finally: os.remove('test.pkl') \ No newline at end of file