Skip to content

Commit

Permalink
[MRG][FIX] Fix GDF returning all annotations with same description (m…
Browse files Browse the repository at this point in the history
…ne-tools#5866)

* fix GDF annotations

* sanitizing

* deprecate find_edf_events

* update whatsnew

* TST: find_edf_events deprecation

* wip

* use a simple function and call it when loading the module

* add the gdf_encodes.txt

* Fix gdf test

* Clean-up

* remove one function

* Python is really nice!

* fix nitpicks (+ adding missing file)

* skip test

* missing file to manifest + sdist

* typo + comments

* fix md5
  • Loading branch information
massich authored and agramfort committed Jan 27, 2019
1 parent cd53a27 commit 217aecf
Show file tree
Hide file tree
Showing 9 changed files with 384 additions and 29 deletions.
1 change: 1 addition & 0 deletions MANIFEST.in
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,7 @@ recursive-include mne/html *.css
recursive-include mne/io/artemis123/resources *

recursive-include mne mne/datasets *.csv
include mne/io/edf/gdf_encodes.txt

### Exclude

Expand Down
4 changes: 4 additions & 0 deletions doc/whats_new.rst
Original file line number Diff line number Diff line change
Expand Up @@ -58,6 +58,8 @@ Changelog
Bug
~~~

- Fix :func:`mne.io.read_raw_edf` returning all the annotations with the same name in GDF files by `Joan Massich`_

- Fix :meth:`mne.io.Raw.append` annotations miss-alignment by `Joan Massich`_

- Fix :func:`mne.io.read_raw_edf` reading duplicate channel names by `Larry Eisenman`_
Expand All @@ -79,6 +81,8 @@ API

- Python 2 is no longer supported; MNE-Python now requires Python 3.5+, by `Eric Larson`_

- Deprecate :func:`mne.io.find_edf_events` by `Joan Massich`_

.. _changes_0_17:

Version 0.17
Expand Down
1 change: 1 addition & 0 deletions mne/datasets/sleep_physionet/tests/test_physionet.py
Original file line number Diff line number Diff line change
Expand Up @@ -124,6 +124,7 @@ def test_sleep_physionet_age(physionet_tmpdir, mocker):
@requires_good_network
@requires_pandas
@requires_version('xlrd', '0.9')
@pytest.mark.skip(reason="Broken with new pandas 0.24 and xlrd")
def test_run_update_temazepam_records(tmpdir):
"""Test Sleep Physionet URL handling."""
import pandas as pd
Expand Down
33 changes: 33 additions & 0 deletions mne/io/edf/_utils.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,33 @@
# -*- coding: utf-8 -*-
"""Helper functions for EDF, EDF+, BDF converters to FIF."""

# Authors: Teon Brooks <teon.brooks@gmail.com>
# Martin Billinger <martin.billinger@tugraz.at>
# Nicolas Barascud <nicolas.barascud@ens.fr>
# Stefan Appelhoff <stefan.appelhoff@mailbox.org>
# Joan Massich <mailsik@gmail.com>
#
# License: BSD (3-clause)

import re
from ...utils import hashfunc


def _load_gdf_events_lut(fname, md5):
if hashfunc(fname, hash_type='md5') != md5:
raise ValueError("File %s is corrupted. mdf5 hashes don't match." %
fname)

# load the stuff
with open(fname, 'r') as fh:
elements = [line for line in fh if not line.startswith("#")]

event_id, event_name = list(), list()
for elem in elements:
event_id_i, *event_name_i = elem.split('\t')
event_id.append(int(event_id_i, 0))
clean_name = re.sub('[ \t]+', ' ', ' '.join(event_name_i))
clean_name = re.sub('\n', '', clean_name)
event_name.append(clean_name)

return dict(zip(event_id, event_name))
61 changes: 37 additions & 24 deletions mne/io/edf/edf.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,17 +15,26 @@
import re

import numpy as np
import os.path as op

from ...utils import verbose, logger, warn
from ..utils import _blk_read_lims
from ..base import BaseRaw, _check_update_montage
from ..meas_info import _empty_info, _unique_channel_names, DATE_NONE
from ..constants import FIFF
from ...filter import resample
from ...utils import copy_function_doc_to_method_doc
from ...annotations import Annotations
from ...utils import copy_function_doc_to_method_doc, deprecated
from ...annotations import Annotations, events_from_annotations
from ._utils import _load_gdf_events_lut


GDF_EVENT_ENCODES_FILE = op.join(op.dirname(__file__), 'gdf_encodes.txt')
GDF_EVENTS_LUT = _load_gdf_events_lut(fname=GDF_EVENT_ENCODES_FILE,
md5='12134a9be7e0bfa5941e95f8bfd330f7')


@deprecated('find_edf_events is deprecated in 0.18, and will be removed'
' in 0.19. Please use `mne.events_from_annotations` instead')
def find_edf_events(raw):
"""Get original EDF events as read from the header.
Expand Down Expand Up @@ -65,7 +74,7 @@ def find_edf_events(raw):
events : ndarray
The events as they are in the file header.
"""
return raw.find_edf_events()
return events_from_annotations(raw)


class RawEDF(BaseRaw):
Expand Down Expand Up @@ -176,34 +185,19 @@ def __init__(self, input_fname, montage, eog=None, misc=None,
verbose=verbose)

# Read annotations from file and set it
annot = None
onset, duration, desc = list(), list(), list()
ext = os.path.splitext(input_fname)[1][1:].lower()
if ext in ('gdf'):
events = edf_info.get('events', None)
# Annotations in GDF: events are stored as the following
# list: `events = [n_events, pos, typ, chn, dur]` where pos is the
# latency, dur is the duration in samples. They both are
# numpy.ndarray
if events is not None and events[1].shape[0] > 0:
# For whatever reason, typ has the same content as pos
# therefore we set an arbitrary description
desc = 'GDF event'
annot = Annotations(onset=events[1] / self.info['sfreq'],
duration=events[4] / self.info['sfreq'],
description=desc,
orig_time=None)
onset, duration, desc = _get_annotations_gdf(edf_info,
self.info['sfreq'])
elif len(edf_info['tal_idx']) > 0:
# Read TAL data exploiting the header info (no regexp)
tal_data = self._read_segment_file([], [], 0, 0, int(self.n_times),
None, None)
onset, duration, desc = _read_annotations_edf(tal_data[0])

# in EDF, annotations are relative to first_samp
annot = Annotations(onset=onset, duration=duration,
description=desc, orig_time=None)

if annot is not None:
self.set_annotations(annot)
self.set_annotations(Annotations(onset=onset, duration=duration,
description=desc, orig_time=None))

@verbose
def _read_segment_file(self, data, idx, fi, start, stop, cals, mult):
Expand Down Expand Up @@ -328,8 +322,10 @@ def _read_segment_file(self, data, idx, fi, start, stop, cals, mult):
return tal_data

@copy_function_doc_to_method_doc(find_edf_events)
@deprecated('find_edf_events is deprecated in 0.18, and will be removed'
' in 0.19. Please use `mne.events_from_annotations` instead')
def find_edf_events(self):
return self._raw_extras[0]['events']
return events_from_annotations(self)


def _read_ch(fid, subtype, samp, dtype_byte, dtype=None):
Expand Down Expand Up @@ -1223,3 +1219,20 @@ def _get_edf_default_event_id(descriptions):
mapping = dict((a, n) for n, a in
enumerate(sorted(set(descriptions)), start=1))
return mapping


def _get_annotations_gdf(edf_info, sfreq):
onset, duration, desc = list(), list(), list()
events = edf_info.get('events', None)
# Annotations in GDF: events are stored as the following
# list: `events = [n_events, pos, typ, chn, dur]` where pos is the
# latency, dur is the duration in samples. They both are
# numpy.ndarray
if events is not None and events[1].shape[0] > 0:
onset = events[1] / sfreq
duration = events[4] / sfreq
desc = [GDF_EVENTS_LUT[key]
if key in GDF_EVENTS_LUT else 'Unknown'
for key in events[2]]

return onset, duration, desc
Loading

0 comments on commit 217aecf

Please sign in to comment.