eonu · eonu · Jan 6, 2021 · Jan 1, 2021 · Jan 2, 2021 · Jan 6, 2021
@@ -58,12 +58,12 @@ The following algorithms provided within Sequentia support the use of multivaria
 
 ### Classification algorithms
 
-- [x] Hidden Markov Models (via [`hmmlearn`](https://github.com/hmmlearn/hmmlearn))<br/><em>Learning with the Baum-Welch algorithm [[1]](#references)</em>
+- [x] Hidden Markov Models (via [`hmmlearn`](https://github.com/hmmlearn/hmmlearn))<br/><em>Learning with the Baum-Welch algorithm</em> [[1]](#references)
   - [x] Gaussian Mixture Model emissions
   - [x] Linear, left-right and ergodic topologies
 - [x] Dynamic Time Warping k-Nearest Neighbors (via [`dtaidistance`](https://github.com/wannesm/dtaidistance))
   - [x] Sakoe–Chiba band global warping constraint
-  - [x] Feature-independent warping (DTWI)
+  - [x] Dependent and independent feature warping (DTWD & DTWI)
   - [x] Custom distance-weighted predictions
   - [x] Multi-processed predictions
 

@@ -1,7 +1,7 @@
 import warnings, tqdm, tqdm.auto, numpy as np, types, pickle, marshal
 from joblib import Parallel, delayed
 from multiprocessing import cpu_count
-from dtaidistance import dtw
+from dtaidistance import dtw, dtw_ndim
 from sklearn.metrics import confusion_matrix
 from sklearn.preprocessing import LabelEncoder
 from ...internals import _Validator
@@ -60,6 +60,9 @@ class KNNClassifier:
 
                 pip install -vvv --upgrade --no-cache-dir --force-reinstall dtaidistance
 
+    independent: bool
+        Whether or not to allow features to be warped independently from each other. See `here <https://www.cs.ucr.edu/~eamonn/Multi-Dimensional_DTW_Journal.pdf>`_ for a good overview of both approaches.
+
     random_state: numpy.random.RandomState, int, optional
         A random state object or seed for reproducible randomness.
 
@@ -84,7 +87,7 @@ class KNNClassifier:
         The complete set of possible classes/labels.
     """
 
-    def __init__(self, k, classes, weighting='uniform', window=1., use_c=False, random_state=None):
+    def __init__(self, k, classes, weighting='uniform', window=1., use_c=False, independent=False, random_state=None):
         self._val = _Validator()
         self._k = self._val.restricted_integer(
             k, lambda x: x > 0, desc='number of neighbors', expected='greater than zero')
@@ -116,6 +119,9 @@ def __init__(self, k, classes, weighting='uniform', window=1., use_c=False, rand
             warnings.warn('DTAIDistance C library not available – using Python implementation', ImportWarning)
             self._use_c = False
 
+        self._independent = self._val.boolean(independent, 'independent')
+        self._dtw = self._dtwi if independent else self._dtwd
+
     def fit(self, X, y):
         """Fits the classifier by adding labeled training observation sequences.
 
@@ -238,6 +244,7 @@ def save(self, path):
                 'weighting': marshal.dumps((self._weighting.__code__, self._weighting.__name__)),
                 'window': self._window,
                 'use_c': self._use_c,
+                'independent': self._independent,
                 'random_state': self._random_state,
                 'X': self._X,
                 'y': self._y,
@@ -262,7 +269,7 @@ def load(cls, path):
             data = pickle.load(file)
 
             # Check deserialized object dictionary and keys
-            keys = set(('k', 'classes', 'weighting', 'window', 'use_c', 'random_state', 'X', 'y', 'n_features'))
+            keys = set(('k', 'classes', 'weighting', 'window', 'use_c', 'independent', 'random_state', 'X', 'y', 'n_features'))
             if not isinstance(data, dict):
                 raise TypeError('Expected deserialized object to be a dictionary - make sure the object was serialized with the save() function')
             else:
@@ -280,6 +287,7 @@ def load(cls, path):
                 weighting=weighting,
                 window=data['window'],
                 use_c=data['use_c'],
+                independent=data['independent'],
                 random_state=data['random_state']
             )
 
@@ -293,11 +301,16 @@ def _dtw_1d(self, a, b, window): # Requires fit
         """Computes the DTW distance between two univariate sequences."""
         return dtw.distance(a, b, use_c=self._use_c, window=window)
 
-    def _dtw(self, A, B): # Requires fit
-        """Computes the multivariate DTW distance as the sum of the pairwise per-feature DTW distances."""
+    def _dtwi(self, A, B): # Requires fit
+        """Computes the multivariate DTW distance as the sum of the pairwise per-feature DTW distances, allowing each feature to be warped independently."""
         window = max(1, int(self._window * max(len(A), len(B))))
         return np.sum([self._dtw_1d(A[:, i], B[:, i], window=window) for i in range(self._n_features)])
 
+    def _dtwd(self, A, B): # Requires fit
+        """Computes the multivariate DTW distance so that the warping of the features depends on each other, by modifying the local distance measure."""
+        window = max(1, int(self._window * max(len(A), len(B))))
+        return dtw_ndim.distance(A, B, use_c=self._use_c, window=window)
+
     def _argmax(self, a):
         """Same as numpy.argmax but returns all occurrences of the maximum, and is O(n) instead of O(2n).
         From: https://stackoverflow.com/a/58652335
@@ -394,6 +407,7 @@ def __repr__(self):
             ('k', repr(self._k)),
             ('window', repr(self._window)),
             ('use_c', repr(self._use_c)),
+            ('independent', repr(self._independent)),
             ('classes', repr(list(self._encoder.classes_)))
         ]
         try:

@@ -20,7 +20,8 @@
     'k=1': KNNClassifier(k=1, classes=classes, random_state=rng),
     'k=2': KNNClassifier(k=2, classes=classes, random_state=rng),
     'k=3': KNNClassifier(k=3, classes=classes, random_state=rng),
-    'weighted': KNNClassifier(k=3, classes=classes, weighting=(lambda x: np.exp(-x)), random_state=rng)
+    'weighted': KNNClassifier(k=3, classes=classes, weighting=(lambda x: np.exp(-x)), random_state=rng),
+    'independent': KNNClassifier(k=1, classes=classes, independent=True, random_state=rng)
 }
 
 for _, clf in clfs.items():
@@ -96,6 +97,18 @@ def test_predict_single_weighted_no_verbose(capsys):
     assert 'Calculating distances' not in capsys.readouterr().err
     assert prediction == 'c1'
 
+def test_predict_single_independent_verbose(capsys):
+    """Verbosely predict a single observation sequence with independent warping"""
+    prediction = clfs['independent'].predict(x, verbose=True)
+    assert 'Calculating distances' in capsys.readouterr().err
+    assert prediction == 'c1'
+
+def test_predict_single_k1_no_verbose(capsys):
+    """Silently predict a single observation sequence with independent warping"""
+    prediction = clfs['independent'].predict(x, verbose=False)
+    assert 'Calculating distances' not in capsys.readouterr().err
+    assert prediction == 'c1'
+
 def test_predict_multiple_k1_verbose(capsys):
     """Verbosely predict multiple observation sequences (k=1)"""
     predictions = clfs['k=1'].predict(X, verbose=True)
@@ -124,25 +137,37 @@ def test_predict_multiple_k3_verbose(capsys):
     """Verbosely predict multiple observation sequences (k=3)"""
     predictions = clfs['k=3'].predict(X, verbose=True)
     assert 'Classifying examples' in capsys.readouterr().err
-    assert list(predictions) == ['c1', 'c1', 'c1', 'c0', 'c0', 'c0']
+    assert list(predictions) == ['c1', 'c1', 'c1', 'c1', 'c0', 'c1']
 
 def test_predict_multiple_k3_no_verbose(capsys):
     """Silently predict multiple observation sequences (k=3)"""
     predictions = clfs['k=3'].predict(X, verbose=False)
     assert 'Classifying examples' not in capsys.readouterr().err
-    assert list(predictions) == ['c1', 'c1', 'c1', 'c0', 'c0', 'c0']
+    assert list(predictions) == ['c1', 'c1', 'c1', 'c1', 'c0', 'c1']
 
 def test_predict_multiple_weighted_verbose(capsys):
     """Verbosely predict multiple observation sequences (weighted)"""
     predictions = clfs['weighted'].predict(X, verbose=True)
     assert 'Classifying examples' in capsys.readouterr().err
-    assert list(predictions) == ['c1', 'c1', 'c0', 'c0', 'c0', 'c1']
+    assert list(predictions) == ['c1', 'c1', 'c0', 'c1', 'c0', 'c1']
 
 def test_predict_multiple_weighted_no_verbose(capsys):
     """Silently predict multiple observation sequences (weighted)"""
     predictions = clfs['weighted'].predict(X, verbose=False)
     assert 'Classifying examples' not in capsys.readouterr().err
-    assert list(predictions) == ['c1', 'c1', 'c0', 'c0', 'c0', 'c1']
+    assert list(predictions) == ['c1', 'c1', 'c0', 'c1', 'c0', 'c1']
+
+def test_predict_multiple_independent_verbose(capsys):
+    """Verbosely predict multiple observation sequences with independent warping"""
+    predictions = clfs['independent'].predict(X, verbose=True)
+    assert 'Classifying examples' in capsys.readouterr().err
+    assert list(predictions) == ['c1', 'c1', 'c0', 'c1', 'c1', 'c0']
+
+def test_predict_multiple_independent_no_verbose(capsys):
+    """Silently predict multiple observation sequences with independent warping"""
+    predictions = clfs['independent'].predict(X, verbose=False)
+    assert 'Classifying examples' not in capsys.readouterr().err
+    assert list(predictions) == ['c1', 'c1', 'c0', 'c1', 'c1', 'c0']
 
 def test_predict_single():
     """Predict a single observation sequence and don't return the original labels"""
@@ -157,12 +182,12 @@ def test_predict_single_original_labels():
 def test_predict_multiple():
     """Predict multiple observation sequences and don't return the original labels"""
     predictions = clfs['k=3'].predict(X, verbose=False, original_labels=False)
-    assert list(predictions) == [1, 1, 1, 0, 0, 0]
+    assert list(predictions) == [1, 1, 1, 1, 0, 1]
 
 def test_predict_multiple_original_labels():
     """Predict multiple observation sequences and return the original labels"""
     predictions = clfs['k=3'].predict(X, verbose=False, original_labels=True)
-    assert list(predictions) == ['c1', 'c1', 'c1', 'c0', 'c0', 'c0']
+    assert list(predictions) == ['c1', 'c1', 'c1', 'c1', 'c0', 'c1']
 
 # ======================== #
 # KNNClassifier.evaluate() #
@@ -173,8 +198,8 @@ def test_evaluate():
     acc, cm = clfs['k=3'].evaluate(X, y)
     assert acc == 0.5
     assert_equal(cm, np.array([
-        [1, 1, 0, 0, 0],
-        [2, 2, 0, 0, 0],
+        [0, 2, 0, 0, 0],
+        [1, 3, 0, 0, 0],
         [0, 0, 0, 0, 0],
         [0, 0, 0, 0, 0],
         [0, 0, 0, 0, 0]
@@ -249,6 +274,7 @@ def test_load_valid_no_weighting():
         assert list(clf._encoder.classes_) == classes
         assert clf._window == 1.
         assert clf._use_c == False
+        assert clf._independent == False
         assert deepcopy(clf._random_state).normal() == deepcopy(rng).normal()
         assert_all_equal(clf._X, X)
         assert_equal(clf._y, clf._encoder.transform(y))
@@ -271,6 +297,7 @@ def test_load_valid_weighting():
         assert list(clf._encoder.classes_) == classes
         assert clf._window == 1.
         assert clf._use_c == False
+        assert clf._independent == False
         assert deepcopy(clf._random_state).normal() == deepcopy(rng).normal()
         assert_all_equal(clf._X, X)
         assert_equal(clf._y, clf._encoder.transform(y))