MRG: Fixes for 1 channel GDF, millivolt support (mne-tools#6755)

* Fixes and tests header bug for 1 channel GDF. Add tests. Adds millivolt support. * ENH: More standard org * Naming [ci skip]
DimitriPapadopoulos · Sep 13, 2019 · 9585e93 · 9585e93
1 parent e99226e
commit 9585e93
Show file tree

Hide file tree

Showing 5 changed files with 42 additions and 22 deletions.
diff --git a/doc/changes/latest.inc b/doc/changes/latest.inc
@@ -96,6 +96,8 @@ Bug
 
 - Fix reading of dates in BrainVision files if no "New Segment" marker is specified, no date is given, or data is missing, by `Stefan Appelhoff`_
 
+- Fix bug with reading one-channel GDF files by `Abram Hindle`_
+
 - Fix bug with y-axis labeling in :meth:`mne.io.Raw.plot_psd` by `Eric Larson`_
 
 - Fix side-effect where :func:`mne.viz.plot_ica_sources` and :meth:`mne.preprocessing.ICA.plot_sources` changed the ``ICA.exclude`` attribute even when users didn't interact with the plot by `Daniel McCloy`_.

diff --git a/doc/changes/names.inc b/doc/changes/names.inc
@@ -253,3 +253,5 @@
 .. _Theodore Papadopoulo: https://github.com/papadop
 
 .. _Milan Rybář: http://milanrybar.cz
+
+.. _Abram Hindle: http://softwareprocess.es
diff --git a/mne/datasets/utils.py b/mne/datasets/utils.py
@@ -237,7 +237,7 @@ def _data_path(path=None, force_update=False, update_path=True, download=True,
     path = _get_path(path, key, name)
     # To update the testing or misc dataset, push commits, then make a new
     # release on GitHub. Then update the "releases" variable:
-    releases = dict(testing='0.70', misc='0.3')
+    releases = dict(testing='0.71', misc='0.3')
     # And also update the "md5_hashes['testing']" variable below.
 
     # To update any other dataset, update the data archive itself (upload
@@ -319,7 +319,7 @@ def _data_path(path=None, force_update=False, update_path=True, download=True,
         sample='fc2d5b9eb0a144b1d6ba84dc3b983602',
         somato='f08f17924e23c57a751b3bed4a05fe02',
         spm='9f43f67150e3b694b523a21eb929ea75',
-        testing='592a922a40406fd40950c825122aa7be',
+        testing='28e420d9c298868f4d537e762823ba5b',
         multimodal='26ec847ae9ab80f58f204d09e2c08367',
         opm='370ad1dcfd5c47e029e692c85358a374',
         visual_92_categories=['74f50bbeb65740903eadc229c9fa759f',

diff --git a/mne/io/edf/edf.py b/mne/io/edf/edf.py
@@ -308,12 +308,6 @@ def _read_segment_file(data, idx, fi, start, stop, raw_extras, chs, filenames):
     tal_idx = raw_extras.get('tal_idx', [])
     subtype = raw_extras['subtype']
 
-    if np.size(dtype_byte) > 1:
-        if len(np.unique(dtype_byte)) > 1:
-            warn("Multiple data type not supported")
-        dtype = dtype[0]
-        dtype_byte = dtype_byte[0]
-
     # gain constructor
     physical_range = np.array([ch['range'] for ch in chs])
     cal = np.array([ch['cal'] for ch in chs])
@@ -700,21 +694,30 @@ def _read_edf_header(fname, exclude):
     return edf_info, orig_units
 
 
+GDFTYPE_NP = (None, np.int8, np.uint8, np.int16, np.uint16, np.int32,
+              np.uint32, np.int64, np.uint64, None, None, None, None,
+              None, None, None, np.float32, np.float64)
+GDFTYPE_BYTE = tuple(np.dtype(x).itemsize if x is not None else 0
+                     for x in GDFTYPE_NP)
+
+
+def _check_dtype_byte(types):
+    assert sum(GDFTYPE_BYTE) == 42
+    dtype_byte = [GDFTYPE_BYTE[t] for t in types]
+    dtype_np = [GDFTYPE_NP[t] for t in types]
+    if len(np.unique(dtype_byte)) > 1:
+        # We will not read it properly, so this should be an error
+        raise RuntimeError("Reading multiple data types not supported")
+    return dtype_np[0], dtype_byte[0]
+
+
 def _read_gdf_header(fname, exclude):
     """Read GDF 1.x and GDF 2.x header info."""
     edf_info = dict()
     events = None
     with open(fname, 'rb') as fid:
 
         version = fid.read(8).decode()
-
-        gdftype_np = (None, np.int8, np.uint8, np.int16, np.uint16, np.int32,
-                      np.uint32, np.int64, np.uint64, None, None, None, None,
-                      None, None, None, np.float32, np.float64)
-        gdftype_byte = [np.dtype(x).itemsize if x is not None else 0
-                        for x in gdftype_np]
-        assert sum(gdftype_byte) == 42
-
         edf_info['type'] = edf_info['subtype'] = version[:3]
         edf_info['number'] = float(version[4:])
         meas_date = DATE_NONE
@@ -796,16 +799,16 @@ def _read_gdf_header(fname, exclude):
             dtype = np.fromfile(fid, np.int32, len(channels))
 
             # total number of bytes for data
-            bytes_tot = np.sum([gdftype_byte[t] * n_samps[i]
+            bytes_tot = np.sum([GDFTYPE_BYTE[t] * n_samps[i]
                                 for i, t in enumerate(dtype)])
 
             # Populate edf_info
+            dtype_np, dtype_byte = _check_dtype_byte(dtype)
             edf_info.update(
                 bytes_tot=bytes_tot, ch_names=ch_names,
                 data_offset=header_nbytes, digital_min=digital_min,
                 digital_max=digital_max,
-                dtype_byte=[gdftype_byte[t] for t in dtype],
-                dtype_np=[gdftype_np[t] for t in dtype], exclude=exclude,
+                dtype_byte=dtype_byte, dtype_np=dtype_np, exclude=exclude,
                 highpass=highpass, sel=sel, lowpass=lowpass,
                 meas_date=meas_date,
                 meas_id=meas_id, n_records=n_records, n_samps=n_samps,
@@ -964,6 +967,8 @@ def _read_gdf_header(fname, exclude):
             for i, unit in enumerate(units):
                 if unit == 4275:  # microvolts
                     units[i] = 1e-6
+                elif unit == 4274:  # millivolts
+                    units[i] = 1e-3
                 elif unit == 512:  # dimensionless
                     units[i] = 1
                 elif unit == 0:
@@ -1017,15 +1022,15 @@ def _read_gdf_header(fname, exclude):
             assert fid.tell() == header_nbytes
 
             # total number of bytes for data
-            bytes_tot = np.sum([gdftype_byte[t] * n_samps[i]
+            bytes_tot = np.sum([GDFTYPE_BYTE[t] * n_samps[i]
                                 for i, t in enumerate(dtype)])
 
             # Populate edf_info
+            dtype_np, dtype_byte = _check_dtype_byte(dtype)
             edf_info.update(
                 bytes_tot=bytes_tot, ch_names=ch_names,
                 data_offset=header_nbytes,
-                dtype_byte=[gdftype_byte[t] for t in dtype],
-                dtype_np=[gdftype_np[t] for t in dtype],
+                dtype_byte=dtype_byte, dtype_np=dtype_np,
                 digital_min=digital_min, digital_max=digital_max,
                 exclude=exclude, gnd=gnd, highpass=highpass, sel=sel,
                 impedance=impedance, lowpass=lowpass, meas_date=meas_date,

diff --git a/mne/io/edf/tests/test_gdf.py b/mne/io/edf/tests/test_gdf.py
@@ -5,6 +5,7 @@
 
 import os.path as op
 
+import pytest
 from numpy.testing import (assert_array_almost_equal, assert_array_equal,
                            assert_equal)
 import numpy as np
@@ -20,6 +21,7 @@
 data_path = testing.data_path(download=False)
 gdf1_path = op.join(data_path, 'GDF', 'test_gdf_1.25')
 gdf2_path = op.join(data_path, 'GDF', 'test_gdf_2.20')
+gdf_1ch_path = op.join(data_path, 'GDF', 'test_1ch.gdf')
 
 
 @testing.requires_testing_data
@@ -83,4 +85,13 @@ def test_gdf2_data():
                      eog=None, misc=None)
 
 
+@testing.requires_testing_data
+def test_one_channel_gdf():
+    """Test a one-channel GDF file."""
+    with pytest.warns(RuntimeWarning, match='different highpass'):
+        ecg = read_raw_gdf(gdf_1ch_path, preload=True)
+    assert ecg['ECG'][0].shape == (1, 4500)
+    assert 150.0 == ecg.info['sfreq']
+
+
 run_tests_if_main()