Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add Hinss2021 dataset #580

Merged
merged 22 commits into from
May 9, 2024
Merged
Show file tree
Hide file tree
Changes from 10 commits
Commits
Show all changes
22 commits
Select commit Hold shift + click to select a range
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions docs/source/whats_new.rst
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,7 @@ Enhancements
- Normalize c-VEP description tables (:gh:`562` :gh:`566` by `Pierre Guetschel`_ and `Bruno Aristimunha`_)
- Update citation in README (:gh:`573` by `Igor Carrara`_)
- Update pyRiemann dependency (:gh:`577` by `Gregoire Cattan`_)
- Add Hinss2021 dataset (:gh:`580` by `Gregoire Cattan`_ and `Yash Chauhan`_)
gcattan marked this conversation as resolved.
Show resolved Hide resolved

Bugs
~~~~
Expand Down Expand Up @@ -437,3 +438,4 @@ API changes
.. _Jordy Thielen: https://github.com/thijor
.. _Sebastien Velut: https://github.com/swetbear
.. _Brian Irvine: https://github.com/brianjohannes
.. _Yash Chauhan https://github.com/jiggychauhi
gcattan marked this conversation as resolved.
Show resolved Hide resolved
169 changes: 169 additions & 0 deletions examples/plot_Hinss2021_classification.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,169 @@
"""
This example uses the Hinss2021 dataset.
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.


The toy question we will try to answer is:
Which one is better between Xdawn,
electrode selection on time epoch and on covariance,
for EEG classification?

"""

# License: BSD (3-clause)

import warnings

import numpy as np
import seaborn as sns
from matplotlib import pyplot as plt
from pyriemann.channelselection import ElectrodeSelection
from pyriemann.estimation import Covariances
from pyriemann.spatialfilters import Xdawn
from pyriemann.tangentspace import TangentSpace
from sklearn.base import TransformerMixin
from sklearn.discriminant_analysis import LinearDiscriminantAnalysis as LDA
from sklearn.pipeline import make_pipeline

from moabb import set_log_level
from moabb.datasets import Hinss2021
from moabb.evaluations import CrossSessionEvaluation
from moabb.paradigms import RestingStateToP300Adapter


print(__doc__)
bruAristimunha marked this conversation as resolved.
Show resolved Hide resolved

##############################################################################
# getting rid of the warnings about the future
warnings.simplefilter(action="ignore", category=FutureWarning)
warnings.simplefilter(action="ignore", category=RuntimeWarning)

warnings.filterwarnings("ignore")
bruAristimunha marked this conversation as resolved.
Show resolved Hide resolved

set_log_level("info")

##############################################################################
# Create util transformer
# ----------------------
#
# Let's create a simple transformer, that will
# select electrodes based on the covariance information


class EpochSelectChannel(TransformerMixin):
"""Select channels based on covariance information,"""

def __init__(self, n_chan, est):
self.n_chan = n_chan
self.est = est

def fit(self, X, _y=None, **kwargs):
bruAristimunha marked this conversation as resolved.
Show resolved Hide resolved
covs = Covariances(estimator=self.est).fit_transform(X)
m = np.mean(covs, axis=0)
n_feats, _ = m.shape
all_max = []
for i in range(n_feats):
for j in range(n_feats):
if len(all_max) <= self.n_chan:
all_max.append(m[i, j])
else:
if m[i, j] > max(all_max):
all_max[np.argmin(all_max)] = m[i, j]
indices = []
for v in all_max:
indices.extend(np.argwhere(m == v).flatten())

indices = np.unique(indices)
self._elec = indices
return self

def transform(self, X, **kwargs):
bruAristimunha marked this conversation as resolved.
Show resolved Hide resolved
return X[:, self._elec, :]


##############################################################################
# Initialization
# ----------------
#
# 1) Create paradigm
# 2) Load datasets
# 3) Select a few subjects and events


events = dict(easy=2, diff=3)

paradigm = RestingStateToP300Adapter(events=events, tmin=0, tmax=0.5)

datasets = [Hinss2021()]

# reduce the number of subjects.
start_subject = 1
stop_subject = 3
title = "Datasets: "
for dataset in datasets:
title = title + " " + dataset.code
dataset.subject_list = dataset.subject_list[start_subject:stop_subject]

##############################################################################
# Create Pipelines
# ----------------
#
# Pipelines must be a dict of sklearn pipeline transformer.

pipelines = {}


pipelines["Xdawn+Cov+TS+LDA"] = make_pipeline(
Xdawn(nfilter=4), Covariances(estimator="lwf"), TangentSpace(), LDA() # 8 components
)

pipelines["Cov+ElSel+TS+LDA"] = make_pipeline(
Covariances(estimator="lwf"), ElectrodeSelection(nelec=8), TangentSpace(), LDA()
)

pipelines["ElSel+Cov+TS+LDA"] = make_pipeline(
EpochSelectChannel(8, "lwf"), Covariances(estimator="lwf"), TangentSpace(), LDA()
)

##############################################################################
# Run evaluation
# ----------------
#
# Compare the pipeline using a cross session evaluation.

# Here should be cross session
evaluation = CrossSessionEvaluation(
paradigm=paradigm,
datasets=datasets,
overwrite=True,
gcattan marked this conversation as resolved.
Show resolved Hide resolved
)

results = evaluation.process(pipelines)

print("Averaging the session performance:")
print(results.groupby("pipeline").mean("score")[["score", "time"]])

# ##############################################################################
# # Plot Results
# # ----------------
# #
# # Here we plot the results to compare two pipelines

fig, ax = plt.subplots(facecolor="white", figsize=[8, 4])

sns.stripplot(
data=results,
y="score",
x="pipeline",
ax=ax,
jitter=True,
alpha=0.5,
zorder=1,
palette="Set1",
)
sns.pointplot(data=results, y="score", x="pipeline", ax=ax, palette="Set1").set(
title=title
)

ax.set_ylabel("ROC AUC")
ax.set_ylim(0.3, 1)

bruAristimunha marked this conversation as resolved.
Show resolved Hide resolved
plt.show()
175 changes: 175 additions & 0 deletions moabb/datasets/Hinss2021.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,175 @@
import os
import os.path as osp
import zipfile as z

import mne
import numpy as np

from moabb.datasets import download as dl
from moabb.datasets.base import BaseDataset


URL = "https://zenodo.org/record/5055046/files/"

EVENTS = dict(rs=1, easy=2, medium=3, diff=4)


class Hinss2021(BaseDataset):
"""Neuroergonomic 2021 dataset.

.. admonition:: Dataset summary


============== ======= ======= ========== ================= ============ =============== ===========
Name #Subj #Chan #Classes #Blocks/class Trials len Sampling rate #Sessions
=============== ======= ======= ========== ================= ============ =============== ===========
Hinss2021 15 62 4 1 2s 250Hz 2
=============== ======= ======= ========== ================= ============ =============== ===========
bruAristimunha marked this conversation as resolved.
Show resolved Hide resolved

We describe the experimental procedures for a dataset that is publicly available
at https://zenodo.org/records/5055046.
This dataset contains electroencephalographic recordings of 15 subjects (6 female, with an
average age of 25 years). A total of 62 active Ag–AgCl
electrodes were available in the dataset.

The participants engaged in 3 (2 available here) distinct experimental sessions, each of which
was separated by 1 week.

At the beginning of each
session, the resting state of the participant (measured as
1 minute with eyes open) was recorded.

Subsequently, participants undertook 3 tasks of varying difficulty levels
(i.e., easy, medium, and difficult). The task assignments
were randomized.

Notes
-----

.. versionadded:: 1.0.1

References
----------

.. [1] M. Hinss, B. Somon, F. Dehais & R. N. Roy (2021)
Open EEG Datasets for Passive Brain-Computer
Interface Applications: Lacks and Perspectives.
IEEE Neural Engineering Conference.
"""

def __init__(self):
super().__init__(
subjects=list(range(1, 16)), # 15 participants
sessions_per_subject=2, # 2 sessions per subject
events=EVENTS,
code="Hinss2021",
interval=[0, 2], # Epochs are 2-second long
paradigm="rstate",
)

def _get_stim_channel(self, rs_epochs, easy_epochs, med_epochs, n_epochs, n_samples):
n_epochs_rs = rs_epochs.get_data().shape[0]
n_epochs_easy = easy_epochs.get_data().shape[0]
n_epochs_med = med_epochs.get_data().shape[0]
stim = np.zeros((1, n_epochs * n_samples))
for i in range(n_epochs):
stim[0, n_samples * i + 1] = (
EVENTS["rs"]
if i < n_epochs_rs
else (
EVENTS["easy"]
if i < n_epochs_rs + n_epochs_easy
else (
EVENTS["medium"]
if i < n_epochs_rs + n_epochs_easy + n_epochs_med
else EVENTS["diff"]
)
)
)
return stim

def _get_epochs(self, session_path, subject, session, event_file):
bruAristimunha marked this conversation as resolved.
Show resolved Hide resolved
raw = os.path.join(
session_path,
f"alldata_sbj{str(subject).zfill(2)}_sess{session}_{event_file}.set",
)
epochs = mne.io.read_epochs_eeglab(raw)
return epochs

def _get_single_subject_data(self, subject):
"""Load data for a single subject."""
data = {}

subject_path = self.data_path(subject)[0]

for session in range(1, self.n_sessions + 1):
session_path = os.path.join(subject_path, f"S{session}/eeg/")

# get 'resting state'
rs_epochs = self._get_epochs(session_path, subject, session, "RS")

# get task 'easy'
easy_epochs = self._get_epochs(session_path, subject, session, "MATBeasy")

# get task 'med'
med_epochs = self._get_epochs(session_path, subject, session, "MATBmed")

# get task 'diff'
diff_epochs = self._get_epochs(session_path, subject, session, "MATBdiff")

# concatenate raw data
raw_data = np.concatenate(
(
rs_epochs.get_data(),
easy_epochs.get_data(),
med_epochs.get_data(),
diff_epochs.get_data(),
)
)

# reshape data in the form n_channel x n_sample
raw_data = raw_data.transpose((1, 0, 2))
n_channel, n_epochs, n_samples = raw_data.shape
raw_data = raw_data.reshape((n_channel, n_epochs * n_samples))

# add stim channel
stim = self._get_stim_channel(
rs_epochs, easy_epochs, med_epochs, n_epochs, n_samples
)
raw_data = np.concatenate((raw_data, stim))

# create info
self._chnames = rs_epochs.ch_names + ["stim"]
self._chtypes = ["eeg"] * (raw_data.shape[0] - 1) + ["stim"]

info = mne.create_info(
ch_names=self._chnames, sfreq=500, ch_types=self._chtypes, verbose=False
)
raw = mne.io.RawArray(raw_data, info)

# Only one run => "0"
data[str(session)] = {"0": raw}

return data

def data_path(
self, subject, path=None, force_update=False, update_path=None, verbose=None
):
if subject not in self.subject_list:
raise (ValueError("Invalid subject number"))

# check if has the .zip
url = f"{URL}P{subject:02}.zip"

path_zip = dl.data_dl(url, "Neuroergonomics2021")
path_folder = path_zip.strip(f"P{subject:02}.zip")

# check if has to unzip
if not (osp.isdir(path_folder + f"P{subject:02}")) and not (
osp.isdir(path_folder + f"P{subject:02}")
):
zip_ref = z.ZipFile(path_zip, "r")
zip_ref.extractall(path_folder)

final_path = f"{path_folder}P{subject:02}"
return [final_path]
1 change: 1 addition & 0 deletions moabb/datasets/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -57,6 +57,7 @@
from .epfl import EPFLP300
from .fake import FakeDataset, FakeVirtualRealityDataset
from .gigadb import Cho2017
from .Hinss2021 import Hinss2021
from .huebner_llp import Huebner2017, Huebner2018
from .Lee2019 import Lee2019_ERP, Lee2019_MI, Lee2019_SSVEP
from .mpi_mi import MunichMI # noqa: F401
Expand Down
Loading