diff --git a/docs/conf.py b/docs/conf.py index f0a324c..4ce06e5 100644 --- a/docs/conf.py +++ b/docs/conf.py @@ -21,6 +21,7 @@ "sphinx.ext.autodoc", "sphinx.ext.autosummary", "sphinx.ext.napoleon", + "sphinxcontrib.bibtex", "sphinx_autodoc_typehints", ] @@ -48,7 +49,12 @@ myst_heading_anchors = 2 -# -- Substitutions ----------------------------------------------------------- +# -- Configure sphinxcontrib-bibtex ------------------------------------------- + +bibtex_bibfiles = ['references.bib'] + + +# -- Substitutions ------------------------------------------------------------ commit_hash = ( subprocess.check_output(["git", "rev-parse", "--short", "HEAD"]) diff --git a/docs/getting_started/contributing_code/contributing_code.md b/docs/getting_started/contributing_code/contributing_code.md index 1fea701..2f93a97 100644 --- a/docs/getting_started/contributing_code/contributing_code.md +++ b/docs/getting_started/contributing_code/contributing_code.md @@ -49,24 +49,18 @@ Some relevant conventions in a nutshell: ### Style Guide for docstrings -Please use [documentation strings](https://docs.python.org/3/tutorial/controlflow.html#documentation-strings) to document modules, classes and functions. There exist several different conventions on how to -format these docstrings. We will follow the Google style as described in the +Please use [documentation strings](https://docs.python.org/3/tutorial/controlflow.html#documentation-strings) +to document modules, classes and functions. There exist several different conventions on +how to dormat these docstrings. We will follow the Google style as described in the [Google Python Style Guide](https://google.github.io/styleguide/pyguide.html#38-comments-and-docstrings). - -Please add references to the literature if you are implementing a published algorithm. - -**Example:** +**Example 1:** ``` def func(arg1 : int, arg2 : str) -> bool: """One sentence description of function. Some more details on what the function does. - Reference to related work or source material [1] - - [1] Authors et al., Title, Journal, Year - Args: arg1: Description of arg1 arg2: Description of arg2 @@ -77,6 +71,48 @@ Please add references to the literature if you are implementing a published algo return True ``` +**Example 2:** +``` + def func( + arg1 : cdt.NDTimeSeries, + arg2 : cdt.NDTimeSeries, + arg3 : Quantity + ) -> cdt.NDTimeSeries: + """Implements algorithm XY based on :cite:t:`BIBTEXLABEL`. + + Some more details on what the function does. + + Args: + arg1 (:class:`NDTimeSeries`, (channel, wavelength, time)): Description of + first argument. For NDTimeSeries we can specify expect dimensions + like this. + arg2 (:class:`NDTimeSeries`, (time, *)): Some algorithms work only along + a given dimension (e.g. frequency filtering) and are agnostic to any + other dimensions in the array. This should be documentated like this. + arg3 (:class:`Quantity`, [time]): Parameters with physical units (e.g. + lengths or time intervals) should be passed as pint.Quantities. The + expected dimensionality could should be documented like this. + + Returns: + Description of return value. + """ + return True +``` + +Please add references to the literature if you are implementing a published algorithm. +There is is a global bibtex file under `docs/references.bib` to which reference entries +should be added with a unique bibtex label. Refer to a reference entry with +with ":cite:t:`BIBTEXLABEL`". Further options are documented in the +[sphinxcontrib-bibtex documentation](https://sphinxcontrib-bibtex.readthedocs.io/en/latest/quickstart.html#minimal-example). + +All references will be listed under [References](../../references.rst). + +Additional [docstring sections](https://sphinxcontrib-napoleon.readthedocs.io/en/latest/index.html#docstring-sections) +may be added, like for example: References, Notes, etc. + + + + ### Where to add my code? When contributing code to Cedalion (=package), try to incorporate it into the existing file and folder structure, which also defines Cedalions package and subpackages. Python files (=modules) contain functions of the same category; folders (subpackages) contain python files of the same family. Examples: @@ -136,10 +172,14 @@ from cedalion import Quantity, units @cdc.validate_schemas def function_name(inputVar1: cdt.NDTimeSeries, inputVar2: Quantity): + """What this function does. - """What does this function do?. + Args: + inputVar1: ... + inputVar2: ... - [1] Authors et al., "title", Journal vol., year, doi: + Returns: + description of the return value """ # @@ -157,6 +197,14 @@ The function is wrapped by putting `@cdc.validate_schemas`in front, which will c The following examples are implemented in the [quality.py module](https://github.com/ibs-lab/cedalion/blob/main/src/cedalion/sigproc/quality.py) ### The helper functions + +```{admonition} Update needed +:class: attention + +The code examples are not up to date. Please refer to the +[current source code](https://github.com/ibs-lab/cedalion/blob/main/src/cedalion/sigproc/quality.py) +``` + Now we can create the small helper functions that calculate and check the SNR, Source-Detector Distances and Amplitudes of fNIRS channels. Using the coordinates and units from Xarrays these are effectively implemented: `def snr_range():` @@ -169,7 +217,8 @@ def snr_range(amplitudes: cdt.NDTimeSeries, snr_thresh: Quantity): INPUTS: amplitues: NDTimeSeries, input fNIRS data xarray with time and channel dimensions. snr_thresh: Quantity, SNR threshold (unitless). - If mean(d)/std(d) < SNRthresh then it is excluded as an active channel + If meaArgs: + inputVar1:n(d)/std(d) < SNRthresh then it is excluded as an active channel OUTPUTS: snr: ratio betwean mean and std of the amplitude signal for all channels. MeasList: list of active channels that meet the conditions diff --git a/docs/index.md b/docs/index.md index 83ad108..eaaba35 100644 --- a/docs/index.md +++ b/docs/index.md @@ -14,6 +14,7 @@ getting_started/index.md data_structures/index.md examples/index.md api/modules.rst +references.rst ``` This documentation was built from commit {{commit_hash}}. diff --git a/docs/references.bib b/docs/references.bib new file mode 100644 index 0000000..7567bdc --- /dev/null +++ b/docs/references.bib @@ -0,0 +1,41 @@ +@article{Pollonini2014, +title = {Auditory cortex activation to natural speech and simulated cochlear implant speech measured with functional near-infrared spectroscopy}, +journal = {Hearing Research}, +volume = {309}, +pages = {84-93}, +year = {2014}, +issn = {0378-5955}, +doi = {https://doi.org/10.1016/j.heares.2013.11.007}, +author = {Luca Pollonini and Cristen Olds and Homer Abaya and Heather Bortfeld and Michael S. Beauchamp and John S. Oghalai}, +abstract = {The primary goal of most cochlear implant procedures is to improve a patient's ability to discriminate speech. To accomplish this, cochlear implants are programmed so as to maximize speech understanding. However, programming a cochlear implant can be an iterative, labor-intensive process that takes place over months. In this study, we sought to determine whether functional near-infrared spectroscopy (fNIRS), a non-invasive neuroimaging method which is safe to use repeatedly and for extended periods of time, can provide an objective measure of whether a subject is hearing normal speech or distorted speech. We used a 140 channel fNIRS system to measure activation within the auditory cortex in 19 normal hearing subjects while they listed to speech with different levels of intelligibility. Custom software was developed to analyze the data and compute topographic maps from the measured changes in oxyhemoglobin and deoxyhemoglobin concentration. Normal speech reliably evoked the strongest responses within the auditory cortex. Distorted speech produced less region-specific cortical activation. Environmental sounds were used as a control, and they produced the least cortical activation. These data collected using fNIRS are consistent with the fMRI literature and thus demonstrate the feasibility of using this technique to objectively detect differences in cortical responses to speech of different intelligibility.} +} + +@article{Huppert2009, +author = {Theodore J. Huppert and Solomon G. Diamond and Maria A. Franceschini and David A. Boas}, +journal = {Appl. Opt.}, +keywords = {Fourier optics and signal processing ; Medical and biological imaging; Spectroscopy; Functional monitoring and imaging ; Absorption coefficient; Brain imaging; Laser sources; Optical absorption; Optical imaging; Tissue optics}, +number = {10}, +pages = {D280--D298}, +publisher = {Optica Publishing Group}, +title = {HomER: a review of time-series analysis methods for near-infrared spectroscopy of the brain}, +volume = {48}, +month = {Apr}, +year = {2009}, +doi = {https://doi.org/10.1364/AO.48.00D280}, +abstract = {Near-infrared spectroscopy (NIRS) is a noninvasive neuroimaging tool for studying evoked hemodynamic changes within the brain. By this technique, changes in the optical absorption of light are recorded over time and are used to estimate the functionally evoked changes in cerebral oxyhemoglobin and deoxyhemoglobin concentrations that result from local cerebral vascular and oxygen metabolic effects during brain activity. Over the past three decades this technology has continued to grow, and today NIRS studies have found many niche applications in the fields of psychology, physiology, and cerebral pathology. The growing popularity of this technique is in part associated with a lower cost and increased portability of NIRS equipment when compared with other imaging modalities, such as functional magnetic resonance imaging and positron emission tomography. With this increasing number of applications, new techniques for the processing, analysis, and interpretation of NIRS data are continually being developed. We review some of the time-series and functional analysis techniques that are currently used in NIRS studies, we describe the practical implementation of various signal processing techniques for removing physiological, instrumental, and motion-artifact noise from optical data, and we discuss the unique aspects of NIRS analysis in comparison with other brain imaging modalities. These methods are described within the context of the MATLAB-based graphical user interface program, HomER, which we have developed and distributed to facilitate the processing of optical functional brain data.}, +url = "https://github.com/BUNPC/Homer3", +} + +@article{Oostenveld2001, +title = {The five percent electrode system for high-resolution EEG and ERP measurements}, +journal = {Clinical Neurophysiology}, +volume = {112}, +number = {4}, +pages = {713-719}, +year = {2001}, +issn = {1388-2457}, +doi = {https://doi.org/10.1016/S1388-2457(00)00527-7}, +author = {Robert Oostenveld and Peter Praamstra}, +keywords = {Electrode placement, High resolution EEG, High resolution ERP, Nomenclature}, +abstract = {Objective: A system for electrode placement is described. It is designed for studies on topography and source analysis of spontaneous and evoked EEG activity. Method: The proposed system is based on the extended International 10–20 system which contains 74 electrodes, and extends this system up to 345 electrode locations. Results: The positioning and nomenclature of the electrode system is described, and a subset of locations is proposed as especially useful for modern EEG/ERP systems, often having 128 channels available. Conclusion: Similar to the extension of the 10–20 system to the 10–10 system (‘10% system’), proposed in 1985, the goal of this new extension to a 10–5 system is to further promote standardization in high-resolution EEG studies.} +} \ No newline at end of file diff --git a/docs/references.rst b/docs/references.rst new file mode 100644 index 0000000..f6762e2 --- /dev/null +++ b/docs/references.rst @@ -0,0 +1,5 @@ +References +========== + +.. bibliography:: + :all: \ No newline at end of file diff --git a/environment_dev.yml b/environment_dev.yml index d28c382..2c6f546 100644 --- a/environment_dev.yml +++ b/environment_dev.yml @@ -56,3 +56,4 @@ dependencies: - setuptools-scm==7.1.0 - snirf==0.7.4 - pmcx + - sphinxcontrib-bibtex diff --git a/src/cedalion/geometry/landmarks.py b/src/cedalion/geometry/landmarks.py index 1407231..2f8906d 100644 --- a/src/cedalion/geometry/landmarks.py +++ b/src/cedalion/geometry/landmarks.py @@ -109,10 +109,12 @@ def _intersect_mesh_with_triangle( class LandmarksBuilder1010: - """Construct the 10-10-system on a scalp surface. + """Construct the 10-10-system on scalp surface based on :cite:t:`Oostenveld2001`. + + Args: + scalp_surface: a triangle-mesh representing the scalp + landmarks: positions of "Nz", "Iz", "LPA", "RPA" - [1] R. Oostenveld and P. Praamstra, “The five percent electrode system for - high-resolution EEG and ERP measurements,” Clinical Neurophysiology, 2001. """ @validate_schemas diff --git a/src/cedalion/sigproc/artifact.py b/src/cedalion/sigproc/artifact.py index 4311f1a..581ff3b 100644 --- a/src/cedalion/sigproc/artifact.py +++ b/src/cedalion/sigproc/artifact.py @@ -9,62 +9,57 @@ @cdc.validate_schemas -def id_motion(fNIRSdata: cdt.NDTimeSeries, t_motion: Quantity, t_mask: Quantity, - stdev_thresh: Quantity, amp_thresh: Quantity): +def id_motion( + fNIRSdata: cdt.NDTimeSeries, + t_motion: Quantity = 0.5 * units.s, + t_mask: Quantity = 1.0 * units.s, + stdev_thresh: float = 50.0, + amp_thresh: float = 5.0, +) -> cdt.NDTimeSeries: """Identify motion artifacts in an fNIRS input data array. If any active data channel exhibits a signal change greater than std_thresh or - amp_thresh, then a segment of data around that time point is marked + amp_thresh, then a segment of data around that time point is marked as a motion artifact. - Based on Homer3 [1] v1.80.2 "hmR_MotionArtifact.m" and "hmR_MotionArtifactByChannel" - Boston University Neurophotonics Center - https://github.com/BUNPC/Homer3 - Translated to cedalion using xarray functionality by AvL, 2024 - - [1] Huppert, T. J., Diamond, S. G., Franceschini, M. A., & Boas, D. A. (2009). - "HomER: a review of time-series analysis methods for near-infrared spectroscopy of - the brain". Appl Opt, 48(10), D280–D298. https://doi.org/10.1364/ao.48.00d280 - - - INPUTS: - amplitudes: NDTimeSeries, input fNIRS data array with at least time and channel - dimensions. Expectation is raw or optical density data. - t_motion: Quantity, time in seconds for motion artifact detection. Checks for - signal change indicative of a motion artifact over time range t_motion. - t_mask: Quantity, time in seconds to mask around motion artifacts. Mark data - over +/- tMask seconds around the identified motion artifact as a - motion artifact. - stdev_thresh: Quantity, threshold for std deviation of signal change. If the - signal d for any given active channel changes by more that - stdev_thresh * stdev(d) over the time interval tMotion, then this - time point is marked as a motion artifact. The standard deviation - is determined for each channel independently. Typical value ranges - from 5 to 20. Use a value of 100 or greater if you wish for this - condition to not find motion artifacts. - amp_thresh: Quantity, threshold for amplitude of signal change. If the signal d - for any given active channel changes by more that amp_thresh over - the time interval tMotion, then this time point is marked as a - motion artifact. Typical value ranges from 0.01 to 0.3 for optical - density units. Use a value greater than 100 if you wish for this - condition to not find motion artifacts. - - OUTPUS: - ma_mask: is an xarray that has at least the dimensions channel and time, - units are boolean. At each time point, "false" indicates data - included and "true" indicates motion artifacts. - - DEFAULT PARAMETERS: - t_motion: 0.5 - t_mask: 1.0 - stdev_thresh: 50.0 - amp_thresh: 5.0 - global_flag: False + Args: + fNIRSdata (:class:`NDTimeSeries`, (time, channel, *)): input time series + + t_motion (:class:`Quantity`, [time]): time interval for motion artifact + detection. Checks for signal change indicative of a motion artifact over + time range t_motion. + + + t_mask (:class:`Quantity`, [time]): time range to mask around motion artifacts. + Mark data over +/- t_mask around the identified motion artifact as a + motion artifact. + + stdev_thresh: threshold for std deviation of signal change. If the signal d for + any given active channel changes by more than stdev_thresh * stdev(d) over + the time interval tMotion, then this time point is marked as a motion + artifact. The standard deviation is determined for each channel + independently. Typical value ranges from 5 to 20. Use a value of 100 or + greater if you wish for this condition to not find motion artifacts. + + amp_thresh: threshold for amplitude of signal change. If the signal d for any + given active channel changes by more that amp_thresh over the time interval + t_motion, then this time point is marked as a motion artifact. Typical value + ranges from 0.01 to 0.3 for optical density units. Use a value greater than + 100 if you wish for this condition to not find motion artifacts. + + Returns: + a DataArray that has at least the dimensions channel and time, dtype is boolean. + At each time point, False indicates data included and True indicates motion + artifacts. + + References: + Based on Homer3 v1.80.2 "hmR_MotionArtifact.m" and "hmR_MotionArtifactByChannel" + (:cite:t:`Huppert2009`). """ # TODO assert OD units, otherwise issue a warning - # t_motion in samples rounded to the nearest sample + # t_motion in samples rounded to the nearest sample t_motion_samples = t_motion / fNIRSdata.time.diff(dim="time").mean() t_motion_samples = int(t_motion_samples.round()) # t_mask in samples rounded to the nearest sample @@ -79,11 +74,11 @@ def id_motion(fNIRSdata: cdt.NDTimeSeries, t_motion: Quantity, t_mask: Quantity, # calculate the differences across different time shifts from 1 to t_motion_samples diff = [] - for ii in range(1, t_motion_samples+1): + for ii in range(1, t_motion_samples + 1): # Shift the data by X samples to the left in the 'time' dimension - shifted_data = fNIRSdata.shift(time=-ii,fill_value=0) + shifted_data = fNIRSdata.shift(time=-ii, fill_value=0) # zero padding of original data where shifted data is shorter - fNIRSdata0 =fNIRSdata.copy() + fNIRSdata0 = fNIRSdata.copy() strt_zeroidx = fNIRSdata0.time[-ii] fNIRSdata0.loc[dict(time=slice(strt_zeroidx, None))] = 0 # calc absolute differences @@ -92,75 +87,82 @@ def id_motion(fNIRSdata: cdt.NDTimeSeries, t_motion: Quantity, t_mask: Quantity, # calculate max_diff across all available time delays max_diff = xr.concat(diff, dim="diff").max(dim="diff") - # create mask for artifact indication art_ind = xrutils.mask(fNIRSdata, True) # updates mask according to motion correction thresholds mc_thresh and amp_thresh: # sets elements to true if either is exceeded. - art_ind = art_ind.where((max_diff>mc_thresh) | (max_diff>amp_thresh), False) + art_ind = art_ind.where((max_diff > mc_thresh) | (max_diff > amp_thresh), False) # apply mask to data to mask points surrounding motion artifacts # convolution kernel for masking - ckernel = np.ones(2*t_mask_samples + 1) + ckernel = np.ones(2 * t_mask_samples + 1) - # create mask: convolve the artifact indicators with the kernel to mask surrounding samples. - # > 0 makes the result of the convolution again a boolean array - ma_mask = xrutils.convolve(art_ind.astype(int), ckernel, 'time') > 0 + # create mask: convolve the artifact indicators with the kernel to mask surrounding + # samples. > 0 makes the result of the convolution again a boolean array + ma_mask = xrutils.convolve(art_ind.astype(int), ckernel, "time") > 0 return ma_mask - @cdc.validate_schemas def id_motion_refine(ma_mask: cdt.NDTimeSeries, operator: str): """Refines motion artifact mask to simplify and quantify motion artifacts. - INPUTS: - ma_mask: motion artifact mask as generated by id_motion(). - boolean xarray array with at least the dimensions channel and time - operator: string, operation to apply to the mask. Available operators: - "by_channel": collapses the mask along the amplitude/wavelenght/chromophore dimension - to provide a single motion artifact mask per channel (default) over time - "all": collapses the mask along all dimensions to provide a single motion - artifact marker for all channels over time - i.e. an artifact detected in any channel masks all channels. - - OUTPUS: - ma_mask_new: updated motion artifact mask, collapsed according to operator - ma_info: pandas dataframe that contains 1) channels with motion artifacts, - 2) # of artifacts detected per channel 3) fraction of artifacts/total time + Args: + ma_mask :class:`NDTimeSeries`, (time, channel, *): motion artifact mask as + generated by id_motion(). + operator: operation to apply to the mask. Available operators: + + - "by_channel": collapses the mask along the amplitude/wavelength/chromo + dimension to provide a single motion artifact mask per channel (default) + over time + - "all": collapses the mask along all dimensions to provide a single motion + artifact marker for all channels over time i.e. an artifact detected in + any channel masks all channels. + + Returns: + A tuple (ma_mask_new, ma_info), where `ma_mask_new` is the updated motion + artifact mask, collapsed according to operator and `ma_info` is a + `pandas.DataFrame` that contains 1) channels with motion artifacts, 2) # of + artifacts detected per channel and 3) fraction of artifacts/total time. """ # combine artifact masks (if multiple masks are provided). # Will result in a single mask containing all motion indicators mask = reduce(lambda x, y: x | y, ma_mask) - # collapse motion artifact masks according to operator instruction if operator.lower() == "by_channel": - # find whether "wavelength" or "concentration" exists as a dimension in ma_mask and collapse, otherwise assert an error + # find whether "wavelength" or "concentration" exists as a dimension in ma_mask + # and collapse, otherwise assert an error if "wavelength" in ma_mask.dims: ma_mask_new = ma_mask.any(dim="wavelength") elif "concentration" in ma_mask.dims: ma_mask_new = ma_mask.any(dim="concentration") else: - raise ValueError("ma_mask must have either 'wavelength' or 'concentration' as a dimension") + raise ValueError( + "ma_mask must have either 'wavelength' " + "or 'concentration' as a dimension" + ) ## --- extract motion artifact info --- ## # extract channels that had motion artifacts ch_wma = ma_mask_new.any(dim="time") ch_labels = ch_wma.where(ch_wma, drop=True).channel.values - # for every channel in ch_label calculate the fraction of time points that are true over the total number of time points - ch_frac = (ma_mask_new.sel(channel=ch_labels).sum(dim='time')/ma_mask_new.sizes['time']).to_series() + # for every channel in ch_label calculate the fraction of time points that are + # true over the total number of time points + ch_frac = ( + ma_mask_new.sel(channel=ch_labels).sum(dim="time") + / ma_mask_new.sizes["time"] + ).to_series() # Count number of motion artifacts (transitions in the mask) for each channel - transitions = ma_mask_new.astype(int).diff(dim='time')==1 - transitions_ct = transitions.sum(dim='time').to_series() + transitions = ma_mask_new.astype(int).diff(dim="time") == 1 + transitions_ct = transitions.sum(dim="time").to_series() # Store motion artifact info in a pandas dataframe - ma_info = pd.DataFrame({ - 'ma_fraction': ch_frac, - 'ma_count': transitions_ct - }).reset_index() + ma_info = pd.DataFrame( + {"ma_fraction": ch_frac, "ma_count": transitions_ct} + ).reset_index() # collapse mask along all dimensions elif operator.lower() == "all": @@ -168,18 +170,20 @@ def id_motion_refine(ma_mask: cdt.NDTimeSeries, operator: str): ma_mask_new = ma_mask.any(dim=dims2collapse) ## --- extract motion artifact info --- ## - global_frac = (mask.sum(dim='time')/mask.sizes['time']).values + global_frac = (mask.sum(dim="time") / mask.sizes["time"]).values # Count number of motion artifacts (transitions in the mask) for each channel - transitions = mask.astype(int).diff(dim='time')==1 - transitions_ct = transitions.sum(dim='time').values + transitions = mask.astype(int).diff(dim="time") == 1 + transitions_ct = transitions.sum(dim="time").values # Store motion artifact info in a pandas dataframe - ma_info = pd.DataFrame({ - 'channel': ['all channels combined'], - 'ma_fraction': [global_frac], - 'ma_count': [transitions_ct] - }) + ma_info = pd.DataFrame( + { + "channel": ["all channels combined"], + "ma_fraction": [global_frac], + "ma_count": [transitions_ct], + } + ) else: - raise ValueError(f"unsupported operator '{operator}'") + raise ValueError(f"unsupported operator '{operator}'") return ma_mask_new, ma_info diff --git a/src/cedalion/sigproc/frequency.py b/src/cedalion/sigproc/frequency.py index 2da8772..cf6c24e 100644 --- a/src/cedalion/sigproc/frequency.py +++ b/src/cedalion/sigproc/frequency.py @@ -13,7 +13,14 @@ def sampling_rate(timeseries: cdt.NDTimeSeries) -> Quantity: """Estimate the sampling rate of the timeseries. - This functions assumes uniform sampling. + Note: + This functions assumes uniform sampling. + + Args: + timeseries (:class:`NDTimeSeries`, (time,*)): the input time series + + Returns: + The sampling rate estimated by averaging time differences between samples. """ assert "units" in timeseries.time.attrs time_unit = units.Unit(timeseries.time.attrs["units"]) @@ -28,8 +35,18 @@ def freq_filter( fmin: Quantity, fmax: Quantity, butter_order=4, -): - """Apply a Butterworth frequency filter.""" +) -> cdt.NDTimeSeries: + """Apply a Butterworth bandpass frequency filter. + + Args: + timeseries (:class:`NDTimeSeries`, (time,*)): the input time series + fmin (:class:`Quantity`, [frequency]): lower threshold of the pass band + fmax (:class:`Quantity`, [frequency]): higher threshold of the pass band + butter_order: order of the filter + + Returns: + The frequency-filtered time series + """ check_dimensionality("fmin", fmin, "[frequency]") check_dimensionality("fax", fmax, "[frequency]") diff --git a/src/cedalion/sigproc/quality.py b/src/cedalion/sigproc/quality.py index fca4417..42aa064 100644 --- a/src/cedalion/sigproc/quality.py +++ b/src/cedalion/sigproc/quality.py @@ -1,33 +1,34 @@ +"""Signal quality metrics and channel pruning.""" + +from functools import reduce +from typing import List + import numpy as np import cedalion.dataclasses as cdc import cedalion.typing as cdt import cedalion.xrutils as xrutils from cedalion import Quantity, units -from typing import List -from functools import reduce +from cedalion.typing import NDTimeSeries from .frequency import freq_filter, sampling_rate @cdc.validate_schemas -def sci(amplitudes: cdt.NDTimeSeries, window_length: Quantity, sci_thresh: Quantity): - """Calculate the scalp-coupling index based on [1]. - - INPUTS: - amplitues: NDTimeSeries, input fNIRS data xarray with time and channel dimensions. - sci_thresh: Quantity, sci threshold (unitless). - If mean(d)/std(d) < SNRthresh then it is excluded as an active channel - OUTPUTS: - sci: xarray with coords from input NDTimeseries containing the scalp-coupling index - sci_mask: boolean mask xarray with coords from sci, true where sci_thresh is met - - - [1] L. Pollonini, C. Olds, H. Abaya, H. Bortfeld, M. S. Beauchamp, and - J. S. Oghalai, “Auditory cortex activation to natural speech and - simulated cochlear implant speech measured with functional near-infrared - spectroscopy,” Hearing Research, vol. 309, pp. 84–93, Mar. 2014, doi: - 10.1016/j.heares.2013.11.007. +def sci(amplitudes: NDTimeSeries, window_length: Quantity, sci_thresh: float): + """Calculate the scalp-coupling index based on :cite:t:`Pollonini2014`. + + Args: + amplitudes (:class:`NDTimeSeries`, (channel, wavelength, time)): input time + series + window_length (:class:`Quantity`, [time]): size of the computation window + sci_thresh: if the calculated SCI metric falls below this threshold then the + corresponding time window should be excluded. + + Returns: + A tuple (sci, sci_mask), where sci is a DataArray with coords from the input + NDTimeseries containing the scalp-coupling index. Sci_mask is a boolean mask + DataArray with coords from sci, true where sci_thresh is met. """ assert "wavelength" in amplitudes.dims # FIXME move to validate schema @@ -60,31 +61,23 @@ def sci(amplitudes: cdt.NDTimeSeries, window_length: Quantity, sci_thresh: Quant @cdc.validate_schemas -def snr(amplitudes: cdt.NDTimeSeries, snr_thresh: Quantity): - """Calculates channel SNR of each channel and wavelength. - - Based on Homer3 [1] v1.80.2 "hmR_PruneChannels.m" - Boston University Neurophotonics Center - https://github.com/BUNPC/Homer3 - - [1] Huppert, T. J., Diamond, S. G., Franceschini, M. A., & Boas, D. A. (2009). - "HomER: a review of time-series analysis methods for near-infrared spectroscopy of the brain". - Appl Opt, 48(10), D280–D298. https://doi.org/10.1364/ao.48.00d280 - - INPUTS: - amplitues: NDTimeSeries, input fNIRS data xarray with time and channel dimensions. - snr_thresh: Quantity, SNR threshold (unitless). - If mean(d)/std(d) < SNRthresh then it is excluded as an active channel - OUTPUTS: - snr: xarray with coords from input NDTimeseries containing the ratio between - mean and std of the amplitude signal for all channels. - snr_mask: boolean mask xarray with coords from snr, true where snr_threshold is met - - DEFAULT PARAMETERS: - amp_threshs: [1e4, 1e7] - snr_thresh: 2 +def snr(amplitudes: cdt.NDTimeSeries, snr_thresh: float = 2.0): + r"""Calculates signal-to-noise ratio for each channel and other dimension. + + Args: + amplitudes (:class:`NDTimeSeries`, (time, *)): the input time series + snr_thresh: threshold (unitless) below which a channel should be excluded. + Returns: + A tuple (snr, snr_mask) , where snr is a DataArray with coords from input + NDTimeseries containing the ratio between mean and std of the amplitude signal + for all channels. snr_mask is a boolean mask DataArray with the same coords + as snr, true where snr_threshold is met. + + References: + Based on Homer3 v1.80.2 "hmR_PruneChannels.m" (:cite:t:`Huppert2009`) """ + # calculate SNR snr = amplitudes.mean("time") / amplitudes.std("time") # create snr mask and update accoording to snr thresholds @@ -95,94 +88,96 @@ def snr(amplitudes: cdt.NDTimeSeries, snr_thresh: Quantity): @cdc.validate_schemas -def mean_amp(amplitudes: cdt.NDTimeSeries, amp_threshs: Quantity): - """Calculate and threshold channels with mean(amplitudes) < amp_threshs(0) or > amp_threshs(1). - - Based on Homer3 [1] v1.80.2 "hmR_PruneChannels.m" - Boston University Neurophotonics Center - https://github.com/BUNPC/Homer3 - - [1] Huppert, T. J., Diamond, S. G., Franceschini, M. A., & Boas, D. A. (2009). - "HomER: a review of time-series analysis methods for near-infrared spectroscopy of the brain". - Appl Opt, 48(10), D280–D298. https://doi.org/10.1364/ao.48.00d280 - - INPUTS: - amplitudes: NDTimeSeries, input fNIRS data xarray with time and channel dimensions. - amp_threshs: Quantity, If mean(amplitudes) < amp_threshs(0) or > amp_threshs(1) - then it is excluded as an active channel in amp_mask - OUTPUTS: - mean_amp: xarray with coords from input NDTimeseries containing the mean amplitudes - amp_mask: boolean mask xarray with coords from mean_amp, true where amp_threshs are met - - DEFAULT PARAMETERS: - amp_threshs: [1e4, 1e7] +def mean_amp(amplitudes: cdt.NDTimeSeries, amp_range: tuple[Quantity, Quantity]): + """Calculate mean amplitudes and mask channels outside amplitude range. + + Args: + amplitudes (:class:`NDTimeSeries`, (time, *)): input time series + amp_range: if amplitudes.mean("time") < amp_threshs[0] or > amp_threshs[1] + then it is excluded as an active channel in amp_mask + Returns: + A tuple (mean_amp, amp_mask), where mean_amp is DataArray with coords from + input NDTimeseries containing the amplitudes averaged over time. The boolean + DataArray amp_mask is true where amp_threshs are met + + References: + Based on Homer3 v1.80.2 "hmR_PruneChannels.m" (:cite:t:`Huppert2009`) """ + # FIXME: default parameters in Homer3 were (1e4, 1e7). Adopt? + # calculate mean amplitude mean_amp = amplitudes.mean("time") # create amplitude mask and update according to amp_range thresholds amp_mask = xrutils.mask(mean_amp, True) - amp_mask = amp_mask.where((mean_amp > amp_threshs[0]) & (mean_amp < amp_threshs[1]), False) + amp_mask = amp_mask.where( + (mean_amp > amp_range[0]) & (mean_amp < amp_range[1]), False + ) return mean_amp, amp_mask @cdc.validate_schemas -def sd_dist(amplitudes: cdt.NDTimeSeries, geo3D: Quantity, sd_threshs: Quantity): - """Calculate and threshold source-detector separations with sd_threshs(1). - - Based on Homer3 [1] v1.80.2 "hmR_PruneChannels.m" - Boston University Neurophotonics Center - https://github.com/BUNPC/Homer3 - - [1] Huppert, T. J., Diamond, S. G., Franceschini, M. A., & Boas, D. A. (2009). - "HomER: a review of time-series analysis methods for near-infrared spectroscopy of the brain". - Appl Opt, 48(10), D280–D298. https://doi.org/10.1364/ao.48.00d280 - - INPUTS: - amplitues: NDTimeSeries, input fNIRS data xarray with time and channel dimensions. - geo3D: Quantity, 3D coordinates of the channels - sd_threshs: Quantity, in mm, cm or m. If source-detector separation sd_threshs(1) - then it is excluded as an active channelin sd_mask - OUTPUTS: - sd_dist: xarray with coords from input NDTimeseries containing the channel distances - sd_mask: boolean mask xarray with coords from ch_dists, true where sd_threshs are met - - DEFAULT PARAMETERS: - sd_threshs: [0.0, 4.5] +def sd_dist( + amplitudes: cdt.NDTimeSeries, + geo3D: cdt.LabeledPointCloud, + sd_range: tuple[Quantity, Quantity] = (0 * units.cm, 4.5 * units.cm), +): + """Calculate source-detector separations and mask channels outside a distance range. + + Args: + amplitudes (:class:`NDTimeSeries`, (channel, *)): input time series + geo3D (:class:`LabeledPointCloud`): 3D optode coordinates + sd_range: if source-detector separation < sd_range[0] or > sd_range[1] + then it is excluded as an active channelin sd_mask + + Returns: + A tuple (sd_dist, sd_mask), where sd_dist contains the channel distances + and sd_mask is a boolean `NDTimeSeries`, indicating where distances fall into + sd_range. + + References: + Based on Homer3 v1.80.2 "hmR_PruneChannels.m" (:cite:t:`Huppert2009`) """ # calculate channel distances sd_dist = xrutils.norm( geo3D.loc[amplitudes.source] - geo3D.loc[amplitudes.detector], dim="digitized" - ).round(3) + ).round(3) # create sd_mask and update according to sd_thresh thresholds sd_mask = xrutils.mask(sd_dist, True) - sd_mask = sd_mask.where((sd_dist > sd_threshs[0]) & (sd_dist < sd_threshs[1]), False) + sd_mask = sd_mask.where((sd_dist > sd_range[0]) & (sd_dist < sd_range[1]), False) return sd_dist, sd_mask - @cdc.validate_schemas -def prune_ch(amplitudes: cdt.NDTimeSeries, masks: List[cdt.NDTimeSeries], operator: str): +def prune_ch( + amplitudes: cdt.NDTimeSeries, masks: List[cdt.NDTimeSeries], operator: str +): """Prune channels from the the input data array using quality masks. - INPUTS: - amplitudes: NDTimeSeries, input fNIRS data xarray with time and channel dimensions. - masks: list of boolean mask xarrays, with coordinates that are a subset of amplitudes - operator: string, operators for combination of masks before pruning data_array - "all": = logical AND, keeps a channel only if it is good across all masks/metrics - "any": = logical OR, keeps channel if it is good in any mask/metric + Args: + amplitudes (:class:`NDTimeSeries`): input time series + masks (:class:`List[NDTimeSeries]`) : list of boolean masks with coordinates + comptabile to amplitudes - OUTPUTS: - amplitudes_pruned: input data with channels pruned (dropped) according to quality masks - prune_list: list of pruned channels + operator: operators for combination of masks before pruning data_array + + - "all": logical AND, keeps channel if it is good across all masks + - "any": logical OR, keeps channel if it is good in any mask/metric + + Returns: + A tuple (amplitudes_pruned, prune_list), where amplitudes_pruned is + a the original time series channels pruned (dropped) according to quality masks. + A list of removed channels is returned in prune_list. """ # check if all dimensions in the all the masks are also existing in data_array for mask in masks: if not all(dim in amplitudes.dims for dim in mask.dims): - raise ValueError("mask dimensions must be a subset of data_array dimensions") + raise ValueError( + "mask dimensions must be a subset of data_array dimensions" + ) # combine quality masks according to operator instruction if operator.lower() == "all": @@ -198,7 +193,8 @@ def prune_ch(amplitudes: cdt.NDTimeSeries, masks: List[cdt.NDTimeSeries], operat raise ValueError(f"unsupported operator '{operator}'") # apply mask to drop channels - amplitudes, prune_list = xrutils.apply_mask(amplitudes, mask, "drop", dim_collapse="channel") - + amplitudes, prune_list = xrutils.apply_mask( + amplitudes, mask, "drop", dim_collapse="channel" + ) return amplitudes, prune_list diff --git a/src/cedalion/typing.py b/src/cedalion/typing.py index d7ff03f..c987eca 100644 --- a/src/cedalion/typing.py +++ b/src/cedalion/typing.py @@ -1,10 +1,11 @@ +from __future__ import annotations from typing import Annotated, TypeAlias import xarray as xr from cedalion.dataclasses.xrschemas import LabeledPointCloudSchema, NDTimeSeriesSchema -LabeledPointCloud = Annotated[xr.DataArray, LabeledPointCloudSchema] -NDTimeSeries = Annotated[xr.DataArray, NDTimeSeriesSchema] +LabeledPointCloud: TypeAlias = Annotated[xr.DataArray, LabeledPointCloudSchema] +NDTimeSeries: TypeAlias = Annotated[xr.DataArray, NDTimeSeriesSchema] AffineTransform: TypeAlias = xr.DataArray