NeuroTechX · bruAristimunha · May 9, 2024 · Apr 28, 2024 · Apr 28, 2024 · Apr 28, 2024
diff --git a/docs/source/whats_new.rst b/docs/source/whats_new.rst
@@ -28,6 +28,7 @@ Enhancements
 - Normalize c-VEP description tables (:gh:`562` :gh:`566` by `Pierre Guetschel`_ and `Bruno Aristimunha`_)
 - Update citation in README (:gh:`573` by `Igor Carrara`_)
 - Update pyRiemann dependency (:gh:`577` by `Gregoire Cattan`_)
+- Add Hinss2021 dataset (:gh:`580` by `Gregoire Cattan`_ and `Yash Chauhan`_)
 
 Bugs
 ~~~~
@@ -437,3 +438,4 @@ API changes
 .. _Jordy Thielen: https://github.com/thijor
 .. _Sebastien Velut: https://github.com/swetbear
 .. _Brian Irvine: https://github.com/brianjohannes
+.. _Yash Chauhan https://github.com/jiggychauhi
diff --git a/examples/plot_Hinss2021_classification.py b/examples/plot_Hinss2021_classification.py
@@ -0,0 +1,169 @@
+"""
+This example uses the Hinss2021 dataset.
+
+The toy question we will try to answer is:
+Which one is better between Xdawn,
+electrode selection on time epoch and on covariance,
+for EEG classification?
+
+"""
+
+# License: BSD (3-clause)
+
+import warnings
+
+import numpy as np
+import seaborn as sns
+from matplotlib import pyplot as plt
+from pyriemann.channelselection import ElectrodeSelection
+from pyriemann.estimation import Covariances
+from pyriemann.spatialfilters import Xdawn
+from pyriemann.tangentspace import TangentSpace
+from sklearn.base import TransformerMixin
+from sklearn.discriminant_analysis import LinearDiscriminantAnalysis as LDA
+from sklearn.pipeline import make_pipeline
+
+from moabb import set_log_level
+from moabb.datasets import Hinss2021
+from moabb.evaluations import CrossSessionEvaluation
+from moabb.paradigms import RestingStateToP300Adapter
+
+
+print(__doc__)
+
+##############################################################################
+# getting rid of the warnings about the future
+warnings.simplefilter(action="ignore", category=FutureWarning)
+warnings.simplefilter(action="ignore", category=RuntimeWarning)
+
+warnings.filterwarnings("ignore")
+
+set_log_level("info")
+
+##############################################################################
+# Create util transformer
+# ----------------------
+#
+# Let's create a simple transformer, that will
+# select electrodes based on the covariance information
+
+
+class EpochSelectChannel(TransformerMixin):
+    """Select channels based on covariance information,"""
+
+    def __init__(self, n_chan, est):
+        self.n_chan = n_chan
+        self.est = est
+
+    def fit(self, X, _y=None, **kwargs):
+        covs = Covariances(estimator=self.est).fit_transform(X)
+        m = np.mean(covs, axis=0)
+        n_feats, _ = m.shape
+        all_max = []
+        for i in range(n_feats):
+            for j in range(n_feats):
+                if len(all_max) <= self.n_chan:
+                    all_max.append(m[i, j])
+                else:
+                    if m[i, j] > max(all_max):
+                        all_max[np.argmin(all_max)] = m[i, j]
+        indices = []
+        for v in all_max:
+            indices.extend(np.argwhere(m == v).flatten())
+
+        indices = np.unique(indices)
+        self._elec = indices
+        return self
+
+    def transform(self, X, **kwargs):
+        return X[:, self._elec, :]
+
+
+##############################################################################
+# Initialization
+# ----------------
+#
+# 1) Create paradigm
+# 2) Load datasets
+# 3) Select a few subjects and events
+
+
+events = dict(easy=2, diff=3)
+
+paradigm = RestingStateToP300Adapter(events=events, tmin=0, tmax=0.5)
+
+datasets = [Hinss2021()]
+
+# reduce the number of subjects.
+start_subject = 1
+stop_subject = 3
+title = "Datasets: "
+for dataset in datasets:
+    title = title + " " + dataset.code
+    dataset.subject_list = dataset.subject_list[start_subject:stop_subject]
+
+##############################################################################
+# Create Pipelines
+# ----------------
+#
+# Pipelines must be a dict of sklearn pipeline transformer.
+
+pipelines = {}
+
+
+pipelines["Xdawn+Cov+TS+LDA"] = make_pipeline(
+    Xdawn(nfilter=4), Covariances(estimator="lwf"), TangentSpace(), LDA()  # 8 components
+)
+
+pipelines["Cov+ElSel+TS+LDA"] = make_pipeline(
+    Covariances(estimator="lwf"), ElectrodeSelection(nelec=8), TangentSpace(), LDA()
+)
+
+pipelines["ElSel+Cov+TS+LDA"] = make_pipeline(
+    EpochSelectChannel(8, "lwf"), Covariances(estimator="lwf"), TangentSpace(), LDA()
+)
+
+##############################################################################
+# Run evaluation
+# ----------------
+#
+# Compare the pipeline using a cross session evaluation.
+
+# Here should be cross session
+evaluation = CrossSessionEvaluation(
+    paradigm=paradigm,
+    datasets=datasets,
+    overwrite=True,
+)
+
+results = evaluation.process(pipelines)
+
+print("Averaging the session performance:")
+print(results.groupby("pipeline").mean("score")[["score", "time"]])
+
+# ##############################################################################
+# # Plot Results
+# # ----------------
+# #
+# # Here we plot the results to compare two pipelines
+
+fig, ax = plt.subplots(facecolor="white", figsize=[8, 4])
+
+sns.stripplot(
+    data=results,
+    y="score",
+    x="pipeline",
+    ax=ax,
+    jitter=True,
+    alpha=0.5,
+    zorder=1,
+    palette="Set1",
+)
+sns.pointplot(data=results, y="score", x="pipeline", ax=ax, palette="Set1").set(
+    title=title
+)
+
+ax.set_ylabel("ROC AUC")
+ax.set_ylim(0.3, 1)
+
+plt.show()
diff --git a/moabb/datasets/Hinss2021.py b/moabb/datasets/Hinss2021.py
@@ -0,0 +1,175 @@
+import os
+import os.path as osp
+import zipfile as z
+
+import mne
+import numpy as np
+
+from moabb.datasets import download as dl
+from moabb.datasets.base import BaseDataset
+
+
+URL = "https://zenodo.org/record/5055046/files/"
+
+EVENTS = dict(rs=1, easy=2, medium=3, diff=4)
+
+
+class Hinss2021(BaseDataset):
+    """Neuroergonomic 2021 dataset.
+
+    .. admonition:: Dataset summary
+
+
+        ==============  =======  =======  ==========  =================  ============  ===============  ===========
+        Name              #Subj    #Chan    #Classes    #Blocks/class     Trials len    Sampling rate    #Sessions
+        =============== =======  =======  ==========  =================  ============  ===============  ===========
+        Hinss2021            15       62           4                 1      2s            250Hz                 2
+        =============== =======  =======  ==========  =================  ============  ===============  ===========
+
+    We describe the experimental procedures for a dataset that is publicly available
+    at https://zenodo.org/records/5055046.
+    This dataset contains electroencephalographic recordings of 15 subjects (6 female, with an
+    average age of 25 years). A total of 62 active Ag–AgCl
+    electrodes were available in the dataset.
+
+    The participants engaged in 3 (2 available here) distinct experimental sessions, each of which
+    was separated by 1 week.
+
+    At the beginning of each
+    session, the resting state of the participant (measured as
+    1 minute with eyes open) was recorded.
+
+    Subsequently, participants undertook 3 tasks of varying difficulty levels
+    (i.e., easy, medium, and difficult). The task assignments
+    were randomized.
+
+    Notes
+    -----
+
+    .. versionadded:: 1.0.1
+
+    References
+    ----------
+
+    .. [1] M. Hinss, B. Somon, F. Dehais & R. N. Roy (2021)
+            Open EEG Datasets for Passive Brain-Computer
+            Interface Applications: Lacks and Perspectives.
+            IEEE Neural Engineering Conference.
+    """
+
+    def __init__(self):
+        super().__init__(
+            subjects=list(range(1, 16)),  # 15 participants
+            sessions_per_subject=2,  # 2 sessions per subject
+            events=EVENTS,
+            code="Hinss2021",
+            interval=[0, 2],  # Epochs are 2-second long
+            paradigm="rstate",
+        )
+
+    def _get_stim_channel(self, rs_epochs, easy_epochs, med_epochs, n_epochs, n_samples):
+        n_epochs_rs = rs_epochs.get_data().shape[0]
+        n_epochs_easy = easy_epochs.get_data().shape[0]
+        n_epochs_med = med_epochs.get_data().shape[0]
+        stim = np.zeros((1, n_epochs * n_samples))
+        for i in range(n_epochs):
+            stim[0, n_samples * i + 1] = (
+                EVENTS["rs"]
+                if i < n_epochs_rs
+                else (
+                    EVENTS["easy"]
+                    if i < n_epochs_rs + n_epochs_easy
+                    else (
+                        EVENTS["medium"]
+                        if i < n_epochs_rs + n_epochs_easy + n_epochs_med
+                        else EVENTS["diff"]
+                    )
+                )
+            )
+        return stim
+
+    def _get_epochs(self, session_path, subject, session, event_file):
+        raw = os.path.join(
+            session_path,
+            f"alldata_sbj{str(subject).zfill(2)}_sess{session}_{event_file}.set",
+        )
+        epochs = mne.io.read_epochs_eeglab(raw)
+        return epochs
+
+    def _get_single_subject_data(self, subject):
+        """Load data for a single subject."""
+        data = {}
+
+        subject_path = self.data_path(subject)[0]
+
+        for session in range(1, self.n_sessions + 1):
+            session_path = os.path.join(subject_path, f"S{session}/eeg/")
+
+            # get 'resting state'
+            rs_epochs = self._get_epochs(session_path, subject, session, "RS")
+
+            # get task 'easy'
+            easy_epochs = self._get_epochs(session_path, subject, session, "MATBeasy")
+
+            # get task 'med'
+            med_epochs = self._get_epochs(session_path, subject, session, "MATBmed")
+
+            # get task 'diff'
+            diff_epochs = self._get_epochs(session_path, subject, session, "MATBdiff")
+
+            # concatenate raw data
+            raw_data = np.concatenate(
+                (
+                    rs_epochs.get_data(),
+                    easy_epochs.get_data(),
+                    med_epochs.get_data(),
+                    diff_epochs.get_data(),
+                )
+            )
+
+            # reshape data in the form n_channel x n_sample
+            raw_data = raw_data.transpose((1, 0, 2))
+            n_channel, n_epochs, n_samples = raw_data.shape
+            raw_data = raw_data.reshape((n_channel, n_epochs * n_samples))
+
+            # add stim channel
+            stim = self._get_stim_channel(
+                rs_epochs, easy_epochs, med_epochs, n_epochs, n_samples
+            )
+            raw_data = np.concatenate((raw_data, stim))
+
+            # create info
+            self._chnames = rs_epochs.ch_names + ["stim"]
+            self._chtypes = ["eeg"] * (raw_data.shape[0] - 1) + ["stim"]
+
+            info = mne.create_info(
+                ch_names=self._chnames, sfreq=500, ch_types=self._chtypes, verbose=False
+            )
+            raw = mne.io.RawArray(raw_data, info)
+
+            # Only one run => "0"
+            data[str(session)] = {"0": raw}
+
+        return data
+
+    def data_path(
+        self, subject, path=None, force_update=False, update_path=None, verbose=None
+    ):
+        if subject not in self.subject_list:
+            raise (ValueError("Invalid subject number"))
+
+        # check if has the .zip
+        url = f"{URL}P{subject:02}.zip"
+
+        path_zip = dl.data_dl(url, "Neuroergonomics2021")
+        path_folder = path_zip.strip(f"P{subject:02}.zip")
+
+        # check if has to unzip
+        if not (osp.isdir(path_folder + f"P{subject:02}")) and not (
+            osp.isdir(path_folder + f"P{subject:02}")
+        ):
+            zip_ref = z.ZipFile(path_zip, "r")
+            zip_ref.extractall(path_folder)
+
+        final_path = f"{path_folder}P{subject:02}"
+        return [final_path]
diff --git a/moabb/datasets/__init__.py b/moabb/datasets/__init__.py
@@ -57,6 +57,7 @@
 from .epfl import EPFLP300
 from .fake import FakeDataset, FakeVirtualRealityDataset
 from .gigadb import Cho2017
+from .Hinss2021 import Hinss2021
 from .huebner_llp import Huebner2017, Huebner2018
 from .Lee2019 import Lee2019_ERP, Lee2019_MI, Lee2019_SSVEP
 from .mpi_mi import MunichMI  # noqa: F401