From 71d57225ed377c67627f0aadb68fc215bd58017c Mon Sep 17 00:00:00 2001 From: bruAristimunha Date: Mon, 29 May 2023 00:21:19 +0200 Subject: [PATCH 01/30] Updating README.md --- docs/source/README.md | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/docs/source/README.md b/docs/source/README.md index b90af6b20..e7536f123 100644 --- a/docs/source/README.md +++ b/docs/source/README.md @@ -146,6 +146,20 @@ fetch(endpoint)
+#### Special acknowledge for the extra MOABB contributors: + + + + + + + + + + + +
Pedro Rodrigues
 Pedro L. C. Rodrigues
+ ### What do we need? **You**! In whatever way you can help. From 5defea69faa57b07243ea9c6ebe796dce054ace4 Mon Sep 17 00:00:00 2001 From: bruAristimunha Date: Mon, 19 Jun 2023 19:33:18 +0200 Subject: [PATCH 02/30] Adding new saving --- docs/source/whats_new.rst | 1 + moabb/evaluations/utils.py | 109 +++++++++++++++++++++++++++++++++++++ 2 files changed, 110 insertions(+) create mode 100644 moabb/evaluations/utils.py diff --git a/docs/source/whats_new.rst b/docs/source/whats_new.rst index e9c1d25e9..32d3b5110 100644 --- a/docs/source/whats_new.rst +++ b/docs/source/whats_new.rst @@ -24,6 +24,7 @@ Enhancements - Fixing the dataset description based on the meta information (:gh:`389` and `398` by `Bruno Aristimunha`_ and `Sara Sedlar`_) - Adding second deployment of the documentation (:gh:`374` by `Bruno Aristimunha`_) - Adding Parallel evaluation for :func:`moabb.evaluations.WithinSessionEvaluation` , :func:`moabb.evaluations.CrossSessionEvaluation` (:gh:`364` by `Bruno Aristimunha`_) +- Adding saving option for the models (:gh:`401` by `Bruno Aristimunha`_ and `Igor Carrara`_) Bugs ~~~~ diff --git a/moabb/evaluations/utils.py b/moabb/evaluations/utils.py new file mode 100644 index 000000000..aa84df0be --- /dev/null +++ b/moabb/evaluations/utils.py @@ -0,0 +1,109 @@ +from os import makedirs +from pathlib import Path + +from joblib import dump + + +def save_model(model, save_path: str, cv_index: int): + """ + Save a model fitted to a folder + Parameters + ---------- + model: object + Model (pipeline) fitted + save_path: str + Path to save the model, will create if it does not exist + based on the parameter hdf5_path from the evaluation object. + cv_index: int + Index of the cross-validation fold used to fit the model + Returns + ------- + filenames: list + List of filenames where the model is saved + """ + # Save the model + makedirs(save_path, exist_ok=True) + return dump(model, Path(save_path) / f"fitted_model_{cv_index}.pkl") + + +def save_model_list(model_list: list, save_path: str): + """ + Save a list of models fitted to a folder + Parameters + ---------- + model_list: list + List of models (pipelines) fitted + save_path: str + Path to save the models, will create if it does not exist + based on the parameter hdf5_path from the evaluation object. + Returns + ------- + """ + # Save the result + makedirs(save_path, exist_ok=True) + for i, model in enumerate(model_list): + dump( + model, + Path(save_path) / f"fitted_model_cv_{str(i)}.pkl", + ) + + +def create_save_path( + hdf5_path: str, + code: str, + subject: int, + session: int, + name: str, + grid=False, + eval_type="WithinSession", +): + """ + Create a save path based on evaluation parameters. + + Parameters + ---------- + hdf5_path : str + The base path where the models will be saved. + code : str + The code for the evaluation. + subject : int + The subject ID for the evaluation. + session : str + The session ID for the evaluation. + name : str + The name for the evaluation. + grid : bool, optional + Whether the evaluation is a grid search or not. Defaults to False. + eval_type : str, optional + The type of evaluation, either 'WithinSession', 'CrossSession' or 'CrossSubject'. + Defaults to WithinSession. + + Returns + ------- + path_save: str + The created save path. + """ + + if eval_type != "WithinSession": + session = "" + + if grid: + path_save = ( + Path(hdf5_path) + / f"GridSearch{eval_type}" + / code + / f"subject{str(subject)}" + / str(session) + / str(name) + ) + else: + path_save = ( + Path(hdf5_path) + / f"Models{eval_type}" + / code + / f"subject{str(subject)}" + / str(session) + / str(name) + ) + + return str(path_save) From a3d7293c64b6b92ab36bdea5dbc9d184d770319b Mon Sep 17 00:00:00 2001 From: bruAristimunha Date: Mon, 19 Jun 2023 19:33:42 +0200 Subject: [PATCH 03/30] Adding new saving model --- moabb/evaluations/evaluations.py | 91 ++++++++++++++++++++++++-------- 1 file changed, 70 insertions(+), 21 deletions(-) diff --git a/moabb/evaluations/evaluations.py b/moabb/evaluations/evaluations.py index 4f7230fa7..c2663cc6e 100644 --- a/moabb/evaluations/evaluations.py +++ b/moabb/evaluations/evaluations.py @@ -16,12 +16,14 @@ StratifiedKFold, StratifiedShuffleSplit, cross_val_score, + cross_validate, ) from sklearn.model_selection._validation import _fit_and_score, _score from sklearn.preprocessing import LabelEncoder from tqdm import tqdm from moabb.evaluations.base import BaseEvaluation +from moabb.evaluations.utils import create_save_path, save_model, save_model_list try: @@ -83,7 +85,7 @@ class WithinSessionEvaluation(BaseEvaluation): suffix: str Suffix for the results file. hdf5_path: str - Specific path for storing the results. + Specific path for storing the results and models. additional_columns: None Adding information to results. return_epochs: bool, default=False @@ -220,13 +222,15 @@ def _evaluate_subject(self, dataset, pipelines, param_grid, subject): grid_clf = clone(clf) - name_grid = os.path.join( - str(self.hdf5_path), - "GridSearch_WithinSession", + # Create folder for grid search results + name_grid = create_save_path( + self.hdf5_path, dataset.code, - "subject" + str(subject), - str(session), - str(name), + subject, + session, + name, + grid=True, + eval_type="WithinSession", ) # Implement Grid Search @@ -244,8 +248,9 @@ def _evaluate_subject(self, dataset, pipelines, param_grid, subject): cvclf.fit(X_[train], y_[train]) acc.append(scorer(cvclf, X_[test], y_[test])) acc = np.array(acc) + score = acc.mean() else: - acc = cross_val_score( + results = cross_validate( grid_clf, X[ix], y_cv, @@ -253,13 +258,27 @@ def _evaluate_subject(self, dataset, pipelines, param_grid, subject): scoring=self.paradigm.scoring, n_jobs=self.n_jobs, error_score=self.error_score, + return_estimator=True, ) - score = acc.mean() + score = results["test_score"].mean() if _carbonfootprint: emissions = tracker.stop() if emissions is None: emissions = np.NaN duration = time() - t_start + + model_save_path = create_save_path( + self.hdf5_path, + dataset.code, + subject, + session, + name, + grid=False, + eval_type="WithinSession", + ) + + save_model_list(results["estimator"], save_path=model_save_path) + nchan = X.info["nchan"] if isinstance(X, BaseEpochs) else X.shape[1] res = { "time": duration / 5.0, # 5 fold CV @@ -541,12 +560,14 @@ def process_subject(self, subject, param_grid, pipelines, dataset): grid_clf = clone(clf) # Load result if the folder exist - name_grid = os.path.join( - str(self.hdf5_path), - "GridSearch_CrossSession", - dataset.code, - str(subject), - name, + name_grid = create_save_path( + hdf5_path=self.hdf5_path, + code=dataset.code, + subject=subject, + session="", + name=name, + grid=True, + eval_type="CrossSession", ) # Implement Grid Search @@ -560,12 +581,14 @@ def process_subject(self, subject, param_grid, pipelines, dataset): emissions_grid = 0 for train, test in cv.split(X, y, groups): + model_list = [] if _carbonfootprint: tracker.start() t_start = time() if isinstance(X, BaseEpochs): cvclf = clone(grid_clf) cvclf.fit(X[train], y[train]) + model_list.append(cvclf) score = scorer(cvclf, X[test], y[test]) else: result = _fit_and_score( @@ -579,14 +602,28 @@ def process_subject(self, subject, param_grid, pipelines, dataset): parameters=None, fit_params=None, error_score=self.error_score, + return_estimator=True, ) score = result["test_scores"] + model_list = result["estimator"] if _carbonfootprint: emissions = tracker.stop() if emissions is None: emissions = 0 duration = time() - t_start + model_save_path = create_save_path( + hdf5_path=self.hdf5_path, + code=dataset.code, + subject=subject, + session="", + name=name, + grid=False, + eval_type="CrossSession", + ) + + save_model_list(model_list=model_list, save_path=model_save_path) + nchan = X.info["nchan"] if isinstance(X, BaseEpochs) else X.shape[1] res = { "time": duration, @@ -725,7 +762,7 @@ def evaluate(self, dataset, pipelines, param_grid): if _carbonfootprint: tracker.start() name_grid = os.path.join( - str(self.hdf5_path), "GridSearch_CrossSubject", dataset.code, name + str(self.hdf5_path), "GridSearchCrossSubject", dataset.code, name ) pipelines[name] = self._grid_search( @@ -738,15 +775,16 @@ def evaluate(self, dataset, pipelines, param_grid): emissions_grid[name] = 0 # Progressbar at subject level - for train, test in tqdm( - cv.split(X, y, groups), - total=n_subjects, - desc=f"{dataset.code}-CrossSubject", + for cv_ind, (train, test) in enumerate( + tqdm( + cv.split(X, y, groups), + total=n_subjects, + desc=f"{dataset.code}-CrossSubject", + ) ): subject = groups[test[0]] # now we can check if this subject has results run_pipes = self.results.not_yet_computed(pipelines, dataset, subject) - # iterate over pipelines for name, clf in run_pipes.items(): if _carbonfootprint: @@ -759,6 +797,17 @@ def evaluate(self, dataset, pipelines, param_grid): emissions = 0 duration = time() - t_start + model_save_path = create_save_path( + hdf5_path=self.hdf5_path, + code=dataset.code, + subject=subject, + session="", + name=name, + grid=False, + eval_type="CrossSubject", + ) + + save_model(model=model, save_path=model_save_path, cv_index=cv_ind) # we eval on each session for session in np.unique(sessions[test]): ix = sessions[test] == session From 432e1285eb54aaeee5844daedddb63e5abc13715 Mon Sep 17 00:00:00 2001 From: bruAristimunha Date: Mon, 19 Jun 2023 19:34:01 +0200 Subject: [PATCH 04/30] Adding new functions --- moabb/evaluations/__init__.py | 1 + 1 file changed, 1 insertion(+) diff --git a/moabb/evaluations/__init__.py b/moabb/evaluations/__init__.py index a48a6fd82..b2af402e9 100644 --- a/moabb/evaluations/__init__.py +++ b/moabb/evaluations/__init__.py @@ -10,3 +10,4 @@ CrossSubjectEvaluation, WithinSessionEvaluation, ) +from .utils import create_save_path, save_model, save_model_list From c40cff84ab7d15d8d8af29b9a38d5f34b358c28a Mon Sep 17 00:00:00 2001 From: bruAristimunha Date: Mon, 19 Jun 2023 19:37:04 +0200 Subject: [PATCH 05/30] updating the models doc --- moabb/evaluations/evaluations.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/moabb/evaluations/evaluations.py b/moabb/evaluations/evaluations.py index c2663cc6e..4cef76a18 100644 --- a/moabb/evaluations/evaluations.py +++ b/moabb/evaluations/evaluations.py @@ -467,7 +467,7 @@ class CrossSessionEvaluation(BaseEvaluation): suffix: str Suffix for the results file. hdf5_path: str - Specific path for storing the results. + Specific path for storing the results and models. additional_columns: None Adding information to results. return_epochs: bool, default=False @@ -673,7 +673,7 @@ class CrossSubjectEvaluation(BaseEvaluation): suffix: str Suffix for the results file. hdf5_path: str - Specific path for storing the results. + Specific path for storing the results and models. additional_columns: None Adding information to results. return_epochs: bool, default=False From 1cac452bca15943a6187c7dbd312ee7de80a4a34 Mon Sep 17 00:00:00 2001 From: bruAristimunha Date: Mon, 19 Jun 2023 20:52:58 +0200 Subject: [PATCH 06/30] updating the models and evaluations --- moabb/evaluations/utils.py | 10 +++++----- moabb/tests/evaluations.py | 2 +- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/moabb/evaluations/utils.py b/moabb/evaluations/utils.py index aa84df0be..6403fa1ac 100644 --- a/moabb/evaluations/utils.py +++ b/moabb/evaluations/utils.py @@ -52,7 +52,7 @@ def create_save_path( hdf5_path: str, code: str, subject: int, - session: int, + session: str, name: str, grid=False, eval_type="WithinSession", @@ -90,18 +90,18 @@ def create_save_path( if grid: path_save = ( Path(hdf5_path) - / f"GridSearch{eval_type}" + / f"GridSearch_{eval_type}" / code - / f"subject{str(subject)}" + / f"{str(subject)}" / str(session) / str(name) ) else: path_save = ( Path(hdf5_path) - / f"Models{eval_type}" + / f"Models_{eval_type}" / code - / f"subject{str(subject)}" + / f"{str(subject)}" / str(session) / str(name) ) diff --git a/moabb/tests/evaluations.py b/moabb/tests/evaluations.py index d08e53b09..dab7fc230 100644 --- a/moabb/tests/evaluations.py +++ b/moabb/tests/evaluations.py @@ -81,7 +81,7 @@ def test_eval_grid_search(self): "res_test", "GridSearch_WithinSession", str(dataset.code), - "subject1", + "1", "session_0", "C", "Grid_Search_WithinSession.pkl", From 6697c88615da6d4e1da6b2dc0268ddb0f7768716 Mon Sep 17 00:00:00 2001 From: Bru Date: Mon, 19 Jun 2023 22:38:29 +0100 Subject: [PATCH 07/30] Update moabb/evaluations/evaluations.py --- moabb/evaluations/evaluations.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/moabb/evaluations/evaluations.py b/moabb/evaluations/evaluations.py index 4cef76a18..1eea80358 100644 --- a/moabb/evaluations/evaluations.py +++ b/moabb/evaluations/evaluations.py @@ -762,7 +762,7 @@ def evaluate(self, dataset, pipelines, param_grid): if _carbonfootprint: tracker.start() name_grid = os.path.join( - str(self.hdf5_path), "GridSearchCrossSubject", dataset.code, name + str(self.hdf5_path), "GridSearch_CrossSubject", dataset.code, name ) pipelines[name] = self._grid_search( From bd034558215e36009fde72926907e63a4e138b25 Mon Sep 17 00:00:00 2001 From: bruAristimunha Date: Tue, 20 Jun 2023 00:20:15 +0200 Subject: [PATCH 08/30] Generatic type --- moabb/tests/evaluations.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/moabb/tests/evaluations.py b/moabb/tests/evaluations.py index dab7fc230..d1bb782ed 100644 --- a/moabb/tests/evaluations.py +++ b/moabb/tests/evaluations.py @@ -237,7 +237,7 @@ def tearDown(self): os.remove(path) def test_fails_if_nothing_returned(self): - self.assertRaises(ValueError, self.eval.process, pipelines) + self.assertRaises(Exception, self.eval.process, pipelines) # TODO Add custom evaluation that actually returns additional info From 3cbc1d0ca43aa7eef259fa330a09490a7381c4f0 Mon Sep 17 00:00:00 2001 From: bruAristimunha Date: Tue, 20 Jun 2023 09:34:22 +0200 Subject: [PATCH 09/30] adding if --- moabb/evaluations/evaluations.py | 63 +++++++++++++++++--------------- 1 file changed, 33 insertions(+), 30 deletions(-) diff --git a/moabb/evaluations/evaluations.py b/moabb/evaluations/evaluations.py index 1eea80358..87f3d41ef 100644 --- a/moabb/evaluations/evaluations.py +++ b/moabb/evaluations/evaluations.py @@ -267,17 +267,18 @@ def _evaluate_subject(self, dataset, pipelines, param_grid, subject): emissions = np.NaN duration = time() - t_start - model_save_path = create_save_path( - self.hdf5_path, - dataset.code, - subject, - session, - name, - grid=False, - eval_type="WithinSession", - ) + if self.hdf5_path is not None: + model_save_path = create_save_path( + self.hdf5_path, + dataset.code, + subject, + session, + name, + grid=False, + eval_type="WithinSession", + ) - save_model_list(results["estimator"], save_path=model_save_path) + save_model_list(results["estimator"], save_path=model_save_path) nchan = X.info["nchan"] if isinstance(X, BaseEpochs) else X.shape[1] res = { @@ -612,17 +613,18 @@ def process_subject(self, subject, param_grid, pipelines, dataset): emissions = 0 duration = time() - t_start - model_save_path = create_save_path( - hdf5_path=self.hdf5_path, - code=dataset.code, - subject=subject, - session="", - name=name, - grid=False, - eval_type="CrossSession", - ) + if self.hdf5_path is not None: + model_save_path = create_save_path( + hdf5_path=self.hdf5_path, + code=dataset.code, + subject=subject, + session="", + name=name, + grid=False, + eval_type="CrossSession", + ) - save_model_list(model_list=model_list, save_path=model_save_path) + save_model_list(model_list=model_list, save_path=model_save_path) nchan = X.info["nchan"] if isinstance(X, BaseEpochs) else X.shape[1] res = { @@ -797,17 +799,18 @@ def evaluate(self, dataset, pipelines, param_grid): emissions = 0 duration = time() - t_start - model_save_path = create_save_path( - hdf5_path=self.hdf5_path, - code=dataset.code, - subject=subject, - session="", - name=name, - grid=False, - eval_type="CrossSubject", - ) + if self.hdf5_path is not None: + model_save_path = create_save_path( + hdf5_path=self.hdf5_path, + code=dataset.code, + subject=subject, + session="", + name=name, + grid=False, + eval_type="CrossSubject", + ) - save_model(model=model, save_path=model_save_path, cv_index=cv_ind) + save_model(model=model, save_path=model_save_path, cv_index=cv_ind) # we eval on each session for session in np.unique(sessions[test]): ix = sessions[test] == session From c0ba05ddac959aded3bb596fafe8417a76fe117c Mon Sep 17 00:00:00 2001 From: bruAristimunha Date: Tue, 20 Jun 2023 11:55:32 +0200 Subject: [PATCH 10/30] Adding saving the best and changing the saving --- moabb/evaluations/evaluations.py | 65 +++++++++++++++++++------------- moabb/evaluations/utils.py | 10 ++++- 2 files changed, 47 insertions(+), 28 deletions(-) diff --git a/moabb/evaluations/evaluations.py b/moabb/evaluations/evaluations.py index 87f3d41ef..882490182 100644 --- a/moabb/evaluations/evaluations.py +++ b/moabb/evaluations/evaluations.py @@ -180,11 +180,9 @@ def _grid_search(self, param_grid, name_grid, name, grid_clf, X_, y_, cv): # flake8: noqa: C901 def _evaluate(self, dataset, pipelines, param_grid): - results = Parallel(n_jobs=self.n_jobs_evaluation, verbose=1)( + results = Parallel(n_jobs=self.n_jobs_evaluation, verbose=1, backend="loky")( delayed(self._evaluate_subject)(dataset, pipelines, param_grid, subject) - for subject in tqdm( - dataset.subject_list, desc=f"{dataset.code}-WithinSession" - ) + for subject in dataset.subject_list ) # Concatenate the results from all subjects @@ -237,16 +235,28 @@ def _evaluate_subject(self, dataset, pipelines, param_grid, subject): grid_clf = self._grid_search( param_grid, name_grid, name, grid_clf, X_, y_, cv ) + model_save_path = create_save_path( + self.hdf5_path, + dataset.code, + subject, + session, + name, + grid=False, + eval_type="WithinSession", + ) if isinstance(X, BaseEpochs): scorer = get_scorer(self.paradigm.scoring) acc = list() X_ = X[ix] y_ = y[ix] if self.mne_labels else y_cv - for train, test in cv.split(X_, y_): + for cv_ind, (train, test) in enumerate(cv.split(X_, y_)): cvclf = clone(grid_clf) cvclf.fit(X_[train], y_[train]) acc.append(scorer(cvclf, X_[test], y_[test])) + save_model( + model=cvclf, save_path=model_save_path, cv_index=cv_ind + ) acc = np.array(acc) score = acc.mean() else: @@ -268,18 +278,12 @@ def _evaluate_subject(self, dataset, pipelines, param_grid, subject): duration = time() - t_start if self.hdf5_path is not None: - model_save_path = create_save_path( - self.hdf5_path, - dataset.code, - subject, - session, - name, - grid=False, - eval_type="WithinSession", + save_model_list( + results["estimator"], + score_list=results["test_score"], + save_path=model_save_path, ) - save_model_list(results["estimator"], save_path=model_save_path) - nchan = X.info["nchan"] if isinstance(X, BaseEpochs) else X.shape[1] res = { "time": duration / 5.0, # 5 fold CV @@ -522,7 +526,7 @@ def evaluate(self, dataset, pipelines, param_grid): results = [] for result in Parallel(n_jobs=self.n_jobs_evaluation, verbose=1)( delayed(self.process_subject)(subject, param_grid, pipelines, dataset) - for subject in tqdm(dataset.subject_list, desc=f"{dataset.code}-CrossSession") + for subject in dataset.subject_list ): results.extend(result) @@ -581,7 +585,17 @@ def process_subject(self, subject, param_grid, pipelines, dataset): if emissions_grid is None: emissions_grid = 0 - for train, test in cv.split(X, y, groups): + model_save_path = create_save_path( + hdf5_path=self.hdf5_path, + code=dataset.code, + subject=subject, + session="", + name=name, + grid=False, + eval_type="CrossSession", + ) + + for cv_ind, (train, test) in enumerate(cv.split(X, y, groups)): model_list = [] if _carbonfootprint: tracker.start() @@ -591,6 +605,11 @@ def process_subject(self, subject, param_grid, pipelines, dataset): cvclf.fit(X[train], y[train]) model_list.append(cvclf) score = scorer(cvclf, X[test], y[test]) + + if self.hdf5_path is not None: + save_model( + model=cvclf, save_path=model_save_path, cv_index=cv_ind + ) else: result = _fit_and_score( clone(grid_clf), @@ -614,18 +633,10 @@ def process_subject(self, subject, param_grid, pipelines, dataset): duration = time() - t_start if self.hdf5_path is not None: - model_save_path = create_save_path( - hdf5_path=self.hdf5_path, - code=dataset.code, - subject=subject, - session="", - name=name, - grid=False, - eval_type="CrossSession", + save_model_list( + model_list=model_list, score_list=score, save_path=model_save_path ) - save_model_list(model_list=model_list, save_path=model_save_path) - nchan = X.info["nchan"] if isinstance(X, BaseEpochs) else X.shape[1] res = { "time": duration, diff --git a/moabb/evaluations/utils.py b/moabb/evaluations/utils.py index 6403fa1ac..f5f8d376f 100644 --- a/moabb/evaluations/utils.py +++ b/moabb/evaluations/utils.py @@ -1,7 +1,9 @@ from os import makedirs from pathlib import Path +from typing import Sequence from joblib import dump +from numpy import argmax def save_model(model, save_path: str, cv_index: int): @@ -26,7 +28,7 @@ def save_model(model, save_path: str, cv_index: int): return dump(model, Path(save_path) / f"fitted_model_{cv_index}.pkl") -def save_model_list(model_list: list, save_path: str): +def save_model_list(model_list: list, score_list: Sequence, save_path: str): """ Save a list of models fitted to a folder Parameters @@ -46,6 +48,12 @@ def save_model_list(model_list: list, save_path: str): model, Path(save_path) / f"fitted_model_cv_{str(i)}.pkl", ) + # Saving the best model + best_model = model_list[argmax(score_list)] + dump( + best_model, + Path(save_path) / "best_model.pkl", + ) def create_save_path( From 5e3252fb5bfd524f7c66e2084d014c1b9db78ca7 Mon Sep 17 00:00:00 2001 From: CARRARA Igor Date: Tue, 20 Jun 2023 12:10:59 +0200 Subject: [PATCH 11/30] Solving Parallel and Saving Model --- moabb/benchmark.py | 8 +++++++- moabb/evaluations/evaluations.py | 23 ++++++++++++----------- 2 files changed, 19 insertions(+), 12 deletions(-) diff --git a/moabb/benchmark.py b/moabb/benchmark.py index b90430d2f..514e09ea7 100644 --- a/moabb/benchmark.py +++ b/moabb/benchmark.py @@ -40,6 +40,7 @@ def benchmark( # noqa: C901 overwrite=False, output="./benchmark/", n_jobs=-1, + n_jobs_evaluation=1, plot=False, contexts=None, include_datasets=None, @@ -85,6 +86,9 @@ def benchmark( # noqa: C901 Folder to store the analysis results n_jobs: int Number of threads to use for running parallel jobs + n_jobs_evaluation: int, default=1 + Number of jobs for evaluation, processing in parallel the within session, + cross-session or cross-subject. plot: bool Plot results after computing contexts: str @@ -172,7 +176,8 @@ def benchmark( # noqa: C901 datasets=d, random_state=42, hdf5_path=results, - n_jobs=1, + n_jobs=n_jobs, + n_jobs_evaluation=n_jobs_evaluation, overwrite=overwrite, return_epochs=True, ) @@ -192,6 +197,7 @@ def benchmark( # noqa: C901 random_state=42, hdf5_path=results, n_jobs=n_jobs, + n_jobs_evaluation=n_jobs_evaluation, overwrite=overwrite, ) paradigm_results = context.process( diff --git a/moabb/evaluations/evaluations.py b/moabb/evaluations/evaluations.py index 882490182..fafd68f16 100644 --- a/moabb/evaluations/evaluations.py +++ b/moabb/evaluations/evaluations.py @@ -15,7 +15,6 @@ LeaveOneGroupOut, StratifiedKFold, StratifiedShuffleSplit, - cross_val_score, cross_validate, ) from sklearn.model_selection._validation import _fit_and_score, _score @@ -24,7 +23,7 @@ from moabb.evaluations.base import BaseEvaluation from moabb.evaluations.utils import create_save_path, save_model, save_model_list - +from sklearn.utils import parallel_backend try: from codecarbon import EmissionsTracker @@ -180,10 +179,11 @@ def _grid_search(self, param_grid, name_grid, name, grid_clf, X_, y_, cv): # flake8: noqa: C901 def _evaluate(self, dataset, pipelines, param_grid): - results = Parallel(n_jobs=self.n_jobs_evaluation, verbose=1, backend="loky")( - delayed(self._evaluate_subject)(dataset, pipelines, param_grid, subject) - for subject in dataset.subject_list - ) + with parallel_backend('threading'): + results = Parallel(n_jobs=self.n_jobs_evaluation, verbose=1)( + delayed(self._evaluate_subject)(dataset, pipelines, param_grid, subject) + for subject in tqdm(dataset.subject_list, desc=f"{dataset.code}-WithinSession") + ) # Concatenate the results from all subjects yield from [res for subject_results in results for res in subject_results] @@ -524,11 +524,12 @@ def evaluate(self, dataset, pipelines, param_grid): raise AssertionError("Dataset is not appropriate for evaluation") # Progressbar at subject level results = [] - for result in Parallel(n_jobs=self.n_jobs_evaluation, verbose=1)( - delayed(self.process_subject)(subject, param_grid, pipelines, dataset) - for subject in dataset.subject_list - ): - results.extend(result) + with parallel_backend('threading'): + for result in Parallel(n_jobs=self.n_jobs_evaluation, verbose=1)( + delayed(self.process_subject)(subject, param_grid, pipelines, dataset) + for subject in dataset.subject_list + ): + results.extend(result) return results From d1e3d00d2f36971c9987f0415935843dfe52dce0 Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Tue, 20 Jun 2023 10:11:44 +0000 Subject: [PATCH 12/30] [pre-commit.ci] auto fixes from pre-commit.com hooks --- moabb/evaluations/evaluations.py | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/moabb/evaluations/evaluations.py b/moabb/evaluations/evaluations.py index fafd68f16..a57fd85de 100644 --- a/moabb/evaluations/evaluations.py +++ b/moabb/evaluations/evaluations.py @@ -19,11 +19,12 @@ ) from sklearn.model_selection._validation import _fit_and_score, _score from sklearn.preprocessing import LabelEncoder +from sklearn.utils import parallel_backend from tqdm import tqdm from moabb.evaluations.base import BaseEvaluation from moabb.evaluations.utils import create_save_path, save_model, save_model_list -from sklearn.utils import parallel_backend + try: from codecarbon import EmissionsTracker @@ -179,10 +180,12 @@ def _grid_search(self, param_grid, name_grid, name, grid_clf, X_, y_, cv): # flake8: noqa: C901 def _evaluate(self, dataset, pipelines, param_grid): - with parallel_backend('threading'): + with parallel_backend("threading"): results = Parallel(n_jobs=self.n_jobs_evaluation, verbose=1)( delayed(self._evaluate_subject)(dataset, pipelines, param_grid, subject) - for subject in tqdm(dataset.subject_list, desc=f"{dataset.code}-WithinSession") + for subject in tqdm( + dataset.subject_list, desc=f"{dataset.code}-WithinSession" + ) ) # Concatenate the results from all subjects @@ -524,7 +527,7 @@ def evaluate(self, dataset, pipelines, param_grid): raise AssertionError("Dataset is not appropriate for evaluation") # Progressbar at subject level results = [] - with parallel_backend('threading'): + with parallel_backend("threading"): for result in Parallel(n_jobs=self.n_jobs_evaluation, verbose=1)( delayed(self.process_subject)(subject, param_grid, pipelines, dataset) for subject in dataset.subject_list From 9d273c10807bc0ebe2db0daef51f35c2dfdd7560 Mon Sep 17 00:00:00 2001 From: bruAristimunha Date: Tue, 20 Jun 2023 12:15:31 +0200 Subject: [PATCH 13/30] Adding if to hdf5_path is None --- moabb/evaluations/utils.py | 46 ++++++++++++++++++++------------------ 1 file changed, 24 insertions(+), 22 deletions(-) diff --git a/moabb/evaluations/utils.py b/moabb/evaluations/utils.py index f5f8d376f..3b3eefa38 100644 --- a/moabb/evaluations/utils.py +++ b/moabb/evaluations/utils.py @@ -57,7 +57,7 @@ def save_model_list(model_list: list, score_list: Sequence, save_path: str): def create_save_path( - hdf5_path: str, + hdf5_path, code: str, subject: int, session: str, @@ -91,27 +91,29 @@ def create_save_path( path_save: str The created save path. """ + if hdf5_path is not None: + if eval_type != "WithinSession": + session = "" - if eval_type != "WithinSession": - session = "" + if grid: + path_save = ( + Path(hdf5_path) + / f"GridSearch_{eval_type}" + / code + / f"{str(subject)}" + / str(session) + / str(name) + ) + else: + path_save = ( + Path(hdf5_path) + / f"Models_{eval_type}" + / code + / f"{str(subject)}" + / str(session) + / str(name) + ) - if grid: - path_save = ( - Path(hdf5_path) - / f"GridSearch_{eval_type}" - / code - / f"{str(subject)}" - / str(session) - / str(name) - ) + return str(path_save) else: - path_save = ( - Path(hdf5_path) - / f"Models_{eval_type}" - / code - / f"{str(subject)}" - / str(session) - / str(name) - ) - - return str(path_save) + print("No hdf5_path provided, models will not be saved.") From e17aacddbd231b9522c017453b018d8a28617172 Mon Sep 17 00:00:00 2001 From: bruAristimunha Date: Tue, 20 Jun 2023 13:03:41 +0200 Subject: [PATCH 14/30] Solving the new path --- .../advanced_examples/plot_grid_search_withinsession.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/examples/advanced_examples/plot_grid_search_withinsession.py b/examples/advanced_examples/plot_grid_search_withinsession.py index afa8f24a1..058c32703 100644 --- a/examples/advanced_examples/plot_grid_search_withinsession.py +++ b/examples/advanced_examples/plot_grid_search_withinsession.py @@ -39,7 +39,7 @@ ) # Create a path and folder for every subject -path = os.path.join(str("Results")) +path = os.path.join(str("/home/bru/PycharmProjects/moabb/result_ver")) os.makedirs(path, exist_ok=True) ############################################################################## @@ -145,7 +145,7 @@ path, "GridSearch_WithinSession", "001-2014", - "subject1", + "1", "session_E", "GridSearchEN", "Grid_Search_WithinSession.pkl", @@ -165,7 +165,7 @@ path, "GridSearch_WithinSession", "001-2014", - "subject1", + "1", "session_T", "GridSearchEN", "Grid_Search_WithinSession.pkl", From db3f4b953a6b173fbd8f7d7b5fe6f9a90b10b034 Mon Sep 17 00:00:00 2001 From: bruAristimunha Date: Tue, 20 Jun 2023 13:04:15 +0200 Subject: [PATCH 15/30] Adding new ifs --- moabb/evaluations/evaluations.py | 48 ++++++++++++++++++-------------- 1 file changed, 27 insertions(+), 21 deletions(-) diff --git a/moabb/evaluations/evaluations.py b/moabb/evaluations/evaluations.py index a57fd85de..dfd5d18b2 100644 --- a/moabb/evaluations/evaluations.py +++ b/moabb/evaluations/evaluations.py @@ -238,15 +238,16 @@ def _evaluate_subject(self, dataset, pipelines, param_grid, subject): grid_clf = self._grid_search( param_grid, name_grid, name, grid_clf, X_, y_, cv ) - model_save_path = create_save_path( - self.hdf5_path, - dataset.code, - subject, - session, - name, - grid=False, - eval_type="WithinSession", - ) + if self.hdf5_path is not None: + model_save_path = create_save_path( + self.hdf5_path, + dataset.code, + subject, + session, + name, + grid=False, + eval_type="WithinSession", + ) if isinstance(X, BaseEpochs): scorer = get_scorer(self.paradigm.scoring) @@ -257,9 +258,12 @@ def _evaluate_subject(self, dataset, pipelines, param_grid, subject): cvclf = clone(grid_clf) cvclf.fit(X_[train], y_[train]) acc.append(scorer(cvclf, X_[test], y_[test])) - save_model( - model=cvclf, save_path=model_save_path, cv_index=cv_ind - ) + + if self.hdf5_path is not None: + save_model( + model=cvclf, save_path=model_save_path, cv_index=cv_ind + ) + acc = np.array(acc) score = acc.mean() else: @@ -589,15 +593,16 @@ def process_subject(self, subject, param_grid, pipelines, dataset): if emissions_grid is None: emissions_grid = 0 - model_save_path = create_save_path( - hdf5_path=self.hdf5_path, - code=dataset.code, - subject=subject, - session="", - name=name, - grid=False, - eval_type="CrossSession", - ) + if self.hdf5_path is not None: + model_save_path = create_save_path( + hdf5_path=self.hdf5_path, + code=dataset.code, + subject=subject, + session="", + name=name, + grid=False, + eval_type="CrossSession", + ) for cv_ind, (train, test) in enumerate(cv.split(X, y, groups)): model_list = [] @@ -778,6 +783,7 @@ def evaluate(self, dataset, pipelines, param_grid): for name, clf in pipelines.items(): if _carbonfootprint: tracker.start() + name_grid = os.path.join( str(self.hdf5_path), "GridSearch_CrossSubject", dataset.code, name ) From 057367c84fede22d4d341618c455e894e14fdab4 Mon Sep 17 00:00:00 2001 From: bruAristimunha Date: Tue, 20 Jun 2023 13:40:34 +0200 Subject: [PATCH 16/30] Returning the Results folder --- examples/advanced_examples/plot_grid_search_withinsession.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/examples/advanced_examples/plot_grid_search_withinsession.py b/examples/advanced_examples/plot_grid_search_withinsession.py index 058c32703..85a15dd44 100644 --- a/examples/advanced_examples/plot_grid_search_withinsession.py +++ b/examples/advanced_examples/plot_grid_search_withinsession.py @@ -39,7 +39,7 @@ ) # Create a path and folder for every subject -path = os.path.join(str("/home/bru/PycharmProjects/moabb/result_ver")) +path = os.path.join(str("Results")) os.makedirs(path, exist_ok=True) ############################################################################## From 5f449620dda7786c9cb4847bd85f65f0c610407e Mon Sep 17 00:00:00 2001 From: CARRARA Igor Date: Tue, 20 Jun 2023 14:55:47 +0200 Subject: [PATCH 17/30] Solve Saved model on Pytorch --- moabb/evaluations/utils.py | 26 ++++++++++++++------------ 1 file changed, 14 insertions(+), 12 deletions(-) diff --git a/moabb/evaluations/utils.py b/moabb/evaluations/utils.py index 3b3eefa38..132a1712d 100644 --- a/moabb/evaluations/utils.py +++ b/moabb/evaluations/utils.py @@ -1,8 +1,8 @@ from os import makedirs from pathlib import Path +from pickle import dump from typing import Sequence -from joblib import dump from numpy import argmax @@ -25,7 +25,9 @@ def save_model(model, save_path: str, cv_index: int): """ # Save the model makedirs(save_path, exist_ok=True) - return dump(model, Path(save_path) / f"fitted_model_{cv_index}.pkl") + with open((Path(save_path) / f"fitted_model_{cv_index}.pkl"), "wb") as f: + dump(model, f) + return def save_model_list(model_list: list, score_list: Sequence, save_path: str): @@ -44,16 +46,18 @@ def save_model_list(model_list: list, score_list: Sequence, save_path: str): # Save the result makedirs(save_path, exist_ok=True) for i, model in enumerate(model_list): - dump( - model, - Path(save_path) / f"fitted_model_cv_{str(i)}.pkl", - ) + with open((Path(save_path) / f"fitted_model_cv_{str(i)}.pkl"), "wb") as f: + dump( + model, + f, + ) # Saving the best model best_model = model_list[argmax(score_list)] - dump( - best_model, - Path(save_path) / "best_model.pkl", - ) + with open((Path(save_path) / "best_model.pkl"), "wb") as f: + dump( + best_model, + f, + ) def create_save_path( @@ -67,7 +71,6 @@ def create_save_path( ): """ Create a save path based on evaluation parameters. - Parameters ---------- hdf5_path : str @@ -85,7 +88,6 @@ def create_save_path( eval_type : str, optional The type of evaluation, either 'WithinSession', 'CrossSession' or 'CrossSubject'. Defaults to WithinSession. - Returns ------- path_save: str From 90feff4222a749a0cd62473e2e0ce8ae7b037128 Mon Sep 17 00:00:00 2001 From: bruAristimunha Date: Tue, 20 Jun 2023 20:01:27 +0200 Subject: [PATCH 18/30] Removing Keras models saving --- moabb/evaluations/utils.py | 42 ++++++++++++++++++++++++++++++++++++-- 1 file changed, 40 insertions(+), 2 deletions(-) diff --git a/moabb/evaluations/utils.py b/moabb/evaluations/utils.py index 3b3eefa38..bd187a741 100644 --- a/moabb/evaluations/utils.py +++ b/moabb/evaluations/utils.py @@ -6,6 +6,29 @@ from numpy import argmax +def _check_if_is_keras_model(model): + """ + Check if the model is a Keras model + Parameters + ---------- + model: object + Model to check + Returns + ------- + is_keras_model: bool + True if the model is a Keras model + """ + try: + from scikeras.wrappers import KerasClassifier + + is_keras_model = isinstance(model, KerasClassifier) + return is_keras_model + except ImportError: + return False + + return False + + def save_model(model, save_path: str, cv_index: int): """ Save a model fitted to a folder @@ -24,8 +47,12 @@ def save_model(model, save_path: str, cv_index: int): List of filenames where the model is saved """ # Save the model - makedirs(save_path, exist_ok=True) - return dump(model, Path(save_path) / f"fitted_model_{cv_index}.pkl") + if any(_check_if_is_keras_model(step) for step in model.named_steps.values()): + print("Keras models are not supported for saving yet.") + return + else: + makedirs(save_path, exist_ok=True) + return dump(model, Path(save_path) / f"fitted_model_{cv_index}.pkl") def save_model_list(model_list: list, score_list: Sequence, save_path: str): @@ -41,7 +68,18 @@ def save_model_list(model_list: list, score_list: Sequence, save_path: str): Returns ------- """ + if model_list is None: + return # Save the result + + if any( + _check_if_is_keras_model(step) + for model in model_list + for step in model.named_steps.values() + ): + print("Keras models are not supported for saving yet.") + return + makedirs(save_path, exist_ok=True) for i, model in enumerate(model_list): dump( From f383e1476e2b2b8f7eeb1b9d0a05a4dacaa5c230 Mon Sep 17 00:00:00 2001 From: bruAristimunha Date: Tue, 20 Jun 2023 20:56:35 +0200 Subject: [PATCH 19/30] Updating model_check --- moabb/evaluations/utils.py | 22 ++++++++++++++-------- 1 file changed, 14 insertions(+), 8 deletions(-) diff --git a/moabb/evaluations/utils.py b/moabb/evaluations/utils.py index 381591544..9b6bfe0fe 100644 --- a/moabb/evaluations/utils.py +++ b/moabb/evaluations/utils.py @@ -72,14 +72,20 @@ def save_model_list(model_list: list, score_list: Sequence, save_path: str): if model_list is None: return # Save the result - - if any( - _check_if_is_keras_model(step) - for model in model_list - for step in model.named_steps.values() - ): - print("Keras models are not supported for saving yet.") - return + if not isinstance(model_list, list): + if any( + _check_if_is_keras_model(step) for step in model_list.named_steps.values() + ): + print("Keras models are not supported for saving yet.") + return + else: + if any( + _check_if_is_keras_model(step) + for model in model_list + for step in model.named_steps.values() + ): + print("Keras models are not supported for saving yet.") + return makedirs(save_path, exist_ok=True) for i, model in enumerate(model_list): From 0365ea36fd1f06253ae2b2dc85015b036b0148a9 Mon Sep 17 00:00:00 2001 From: CARRARA Igor Date: Wed, 21 Jun 2023 12:02:02 +0200 Subject: [PATCH 20/30] Updated Saved model in Pytorch, second methodology if is a Skorch model --- moabb/evaluations/utils.py | 65 ++++++++++++++++++++++++++++++++------ 1 file changed, 56 insertions(+), 9 deletions(-) diff --git a/moabb/evaluations/utils.py b/moabb/evaluations/utils.py index 9b6bfe0fe..f1845b67b 100644 --- a/moabb/evaluations/utils.py +++ b/moabb/evaluations/utils.py @@ -27,6 +27,27 @@ def _check_if_is_keras_model(model): return False +def _check_if_is_pytorch_model(model): + """ + Check if the model is a Keras model + Parameters + ---------- + model: object + Model to check + Returns + ------- + is_keras_model: bool + True if the model is a Keras model + """ + try: + from skorch import NeuralNetClassifier + + is_pytorch_model = isinstance(model, NeuralNetClassifier) + return is_pytorch_model + except ImportError: + return False + + def save_model(model, save_path: str, cv_index: int): """ Save a model fitted to a folder @@ -88,19 +109,45 @@ def save_model_list(model_list: list, score_list: Sequence, save_path: str): return makedirs(save_path, exist_ok=True) + for i, model in enumerate(model_list): - with open((Path(save_path) / f"fitted_model_cv_{str(i)}.pkl"), "wb") as f: + if any(_check_if_is_pytorch_model(step) for step in model.named_steps.values()): + from skorch import NeuralNetClassifier + + for step in model.named_steps.values(): + if isinstance(step, NeuralNetClassifier): + step.save_params( + f_params=Path(save_path) / f"fitted_model_cv_{str(i)}.pkl", + f_optimizer=Path(save_path) / f"opt_cv_{str(i)}.pkl", + f_history=Path(save_path) / f"history_cv_{str(i)}.json", + ) + + else: + with open((Path(save_path) / f"fitted_model_cv_{str(i)}.pkl"), "wb") as f: + dump( + model, + f, + ) + # Saving the best model + best_model = model_list[argmax(score_list)] + + if any(_check_if_is_pytorch_model(step) for step in best_model.named_steps.values()): + from skorch import NeuralNetClassifier + + for step in best_model.named_steps.values(): + if isinstance(step, NeuralNetClassifier): + step.save_params( + f_params=Path(save_path) / "best_model.pkl", + f_optimizer=Path(save_path) / "best_opt.pkl", + f_history=Path(save_path) / "best_history.json", + ) + + else: + with open((Path(save_path) / "best_model.pkl"), "wb") as f: dump( - model, + best_model, f, ) - # Saving the best model - best_model = model_list[argmax(score_list)] - with open((Path(save_path) / "best_model.pkl"), "wb") as f: - dump( - best_model, - f, - ) def create_save_path( From df6e0d107a2a28f614ff28daac9154946ab6e37d Mon Sep 17 00:00:00 2001 From: CARRARA Igor Date: Wed, 21 Jun 2023 14:54:23 +0200 Subject: [PATCH 21/30] Added Saved Model on Keras and Pytorch --- moabb/evaluations/utils.py | 101 ++++++++++++++++++++++++++----------- 1 file changed, 72 insertions(+), 29 deletions(-) diff --git a/moabb/evaluations/utils.py b/moabb/evaluations/utils.py index f1845b67b..c332b7602 100644 --- a/moabb/evaluations/utils.py +++ b/moabb/evaluations/utils.py @@ -66,17 +66,33 @@ def save_model(model, save_path: str, cv_index: int): List of filenames where the model is saved """ # Save the model - if any(_check_if_is_keras_model(step) for step in model.named_steps.values()): - print("Keras models are not supported for saving yet.") - return - else: - makedirs(save_path, exist_ok=True) + makedirs(save_path, exist_ok=True) + + if any(_check_if_is_pytorch_model(j) for j in model.named_steps.values()): + from skorch import NeuralNetClassifier + + for step in model.named_steps.values(): + if isinstance(step, NeuralNetClassifier): + step.save_params( + f_params=Path(save_path) / f"fitted_model_cv_{str(cv_index)}.pkl", + f_optimizer=Path(save_path) / f"opt_cv_{str(cv_index)}.pkl", + f_history=Path(save_path) / f"history_cv_{str(cv_index)}.json", + ) + + elif any(_check_if_is_keras_model(j) for j in model.named_steps.values()): + from scikeras.wrappers import KerasClassifier + for step in model.named_steps.values(): + if isinstance(step, KerasClassifier): + step.model_.save(Path(save_path) / f"fitted_model_cv_{str(cv_index)}") + + else: with open((Path(save_path) / f"fitted_model_{cv_index}.pkl"), "wb") as f: dump(model, f) return +# flake8: noqa: C901 def save_model_list(model_list: list, score_list: Sequence, save_path: str): """ Save a list of models fitted to a folder @@ -93,45 +109,65 @@ def save_model_list(model_list: list, score_list: Sequence, save_path: str): if model_list is None: return # Save the result - if not isinstance(model_list, list): - if any( - _check_if_is_keras_model(step) for step in model_list.named_steps.values() - ): - print("Keras models are not supported for saving yet.") - return - else: - if any( - _check_if_is_keras_model(step) - for model in model_list - for step in model.named_steps.values() - ): - print("Keras models are not supported for saving yet.") - return - makedirs(save_path, exist_ok=True) - for i, model in enumerate(model_list): - if any(_check_if_is_pytorch_model(step) for step in model.named_steps.values()): + if not isinstance(model_list, list): + if any(_check_if_is_pytorch_model(j) for j in model_list.named_steps.values()): from skorch import NeuralNetClassifier - for step in model.named_steps.values(): + for step in model_list.named_steps.values(): if isinstance(step, NeuralNetClassifier): step.save_params( - f_params=Path(save_path) / f"fitted_model_cv_{str(i)}.pkl", - f_optimizer=Path(save_path) / f"opt_cv_{str(i)}.pkl", - f_history=Path(save_path) / f"history_cv_{str(i)}.json", + f_params=Path(save_path) / "fitted_model.pkl", + f_optimizer=Path(save_path) / "opt.pkl", + f_history=Path(save_path) / "history.json", ) + elif any(_check_if_is_keras_model(j) for j in model_list.named_steps.values()): + from scikeras.wrappers import KerasClassifier + + for step in model_list.named_steps.values(): + if isinstance(step, KerasClassifier): + step.model_.save(Path(save_path) / "fitted_model") + else: - with open((Path(save_path) / f"fitted_model_cv_{str(i)}.pkl"), "wb") as f: + with open((Path(save_path) / "fitted_model.pkl"), "wb") as f: dump( - model, + model_list, f, ) + + else: + for i, model in enumerate(model_list): + if any(_check_if_is_pytorch_model(j) for j in model.named_steps.values()): + from skorch import NeuralNetClassifier + + for step in model.named_steps.values(): + if isinstance(step, NeuralNetClassifier): + step.save_params( + f_params=Path(save_path) / f"fitted_model_cv_{str(i)}.pkl", + f_optimizer=Path(save_path) / f"opt_cv_{str(i)}.pkl", + f_history=Path(save_path) / f"history_cv_{str(i)}.json", + ) + + elif any(_check_if_is_keras_model(j) for j in model.named_steps.values()): + from scikeras.wrappers import KerasClassifier + + for step in model.named_steps.values(): + if isinstance(step, KerasClassifier): + step.model_.save(Path(save_path) / f"fitted_model_cv_{str(i)}") + + else: + with open((Path(save_path) / f"fitted_model_cv_{str(i)}.pkl"), "wb") as f: + dump( + model, + f, + ) + # Saving the best model best_model = model_list[argmax(score_list)] - if any(_check_if_is_pytorch_model(step) for step in best_model.named_steps.values()): + if any(_check_if_is_pytorch_model(j) for j in best_model.named_steps.values()): from skorch import NeuralNetClassifier for step in best_model.named_steps.values(): @@ -142,6 +178,13 @@ def save_model_list(model_list: list, score_list: Sequence, save_path: str): f_history=Path(save_path) / "best_history.json", ) + elif any(_check_if_is_keras_model(j) for j in best_model.named_steps.values()): + from scikeras.wrappers import KerasClassifier + + for step in best_model.named_steps.values(): + if isinstance(step, KerasClassifier): + step.model_.save(Path(save_path) / "best_model") + else: with open((Path(save_path) / "best_model.pkl"), "wb") as f: dump( From ef6afff66f969e7074c7e23f883d82e0d0b33981 Mon Sep 17 00:00:00 2001 From: CARRARA Igor Date: Wed, 21 Jun 2023 16:04:13 +0200 Subject: [PATCH 22/30] Example Load model --- docs/source/whats_new.rst | 1 + examples/plot_load_model.py | 121 ++++++++++++++++++++++++++++++++++++ 2 files changed, 122 insertions(+) create mode 100644 examples/plot_load_model.py diff --git a/docs/source/whats_new.rst b/docs/source/whats_new.rst index 78fa05867..edc099ada 100644 --- a/docs/source/whats_new.rst +++ b/docs/source/whats_new.rst @@ -26,6 +26,7 @@ Enhancements - Adding Parallel evaluation for :func:`moabb.evaluations.WithinSessionEvaluation` , :func:`moabb.evaluations.CrossSessionEvaluation` (:gh:`364` by `Bruno Aristimunha`_) - Add example with VirtualReality BrainInvaders dataset (:gh:`393` by `Gregoire Cattan`_ and `Pedro L. C. Rodrigues`_) - Adding saving option for the models (:gh:`401` by `Bruno Aristimunha`_ and `Igor Carrara`_) +- Adding example to load different type of models (:gh:`401` by `Bruno Aristimunha`_ and `Igor Carrara`_) Bugs ~~~~ diff --git a/examples/plot_load_model.py b/examples/plot_load_model.py new file mode 100644 index 000000000..ea611f8e9 --- /dev/null +++ b/examples/plot_load_model.py @@ -0,0 +1,121 @@ +""" +============================================== +Load Model (Scikit, Pytorch, Keras) with MOABB +============================================== + +This example shows how to use load the pretrained pipeline in MOABB. +""" +# Authors: Igor Carrara +# +# License: BSD (3-clause) + +from pickle import load + +import keras +import torch +from braindecode import EEGClassifier +from braindecode.models import EEGInception +from scikeras.wrappers import KerasClassifier +from sklearn.pipeline import Pipeline +from skorch.callbacks import EarlyStopping, EpochScoring +from skorch.dataset import ValidSplit + +from moabb import set_log_level +from moabb.pipelines.features import StandardScaler_Epoch +from moabb.pipelines.utils_pytorch import BraindecodeDatasetLoader, InputShapeSetterEEG +from moabb.utils import setup_seed + + +set_log_level("info") + +############################################################################### +# In this example, we will use the results computed by the following examples +# +# - plot_benchmark_ +# - plot_benchmark_braindecode_ +# - plot_benchmark_DL_ +# --------------------- + +# Set up reproducibility of Tensorflow and PyTorch +setup_seed(42) + +############################################################################### +# Loading the Scikit-learn pipelines + +with open( + "./results/Models_WithinSession/Zhou 2016/1/session_0/CSP + SVM/best_model.pkl", "rb" +) as pickle_file: + CSP_SVM_Trained = load(pickle_file) + +############################################################################### +# Loading the Keras model +# We load the single Keras model, if we want we can set in the exact same pipeline. + +model_Keras = keras.models.load_model( + "./results/Models_WithinSession/001-2014/1/session_E/Keras_DeepConvNet/best_model" +) +# Now we need to instantiate a new SciKeras object since we only saved the Keras model +Keras_DeepConvNet_Trained = KerasClassifier(model_Keras) +# Create the pipelines + + +pipes_keras = Pipeline( + [ + ("StandardScaler_Epoch", StandardScaler_Epoch), + ("Keras_DeepConvNet_Trained", Keras_DeepConvNet_Trained), + ] +) + + +############################################################################### +# Loading the PyTorch model + +# Set EEG Inception model +model = EEGInception(in_channels=22, n_classes=2) + +# Hyperparameter +LEARNING_RATE = 0.0001 +WEIGHT_DECAY = 0 +BATCH_SIZE = 64 +SEED = 42 +VERBOSE = 1 +EPOCH = 2 +PATIENCE = 3 + +# Define a Skorch classifier +clf = EEGClassifier( + module=model, + criterion=torch.nn.CrossEntropyLoss, + optimizer=torch.optim.Adam, + optimizer__lr=LEARNING_RATE, + batch_size=BATCH_SIZE, + max_epochs=EPOCH, + train_split=ValidSplit(0.2, random_state=SEED), + callbacks=[ + EarlyStopping(monitor="valid_loss", patience=PATIENCE), + EpochScoring( + scoring="accuracy", on_train=True, name="train_acc", lower_is_better=False + ), + EpochScoring( + scoring="accuracy", on_train=False, name="valid_acc", lower_is_better=False + ), + InputShapeSetterEEG( + params_list=["in_channels", "input_window_samples", "n_classes"], + ), + ], + verbose=VERBOSE, # Not printing the results for each epoch +) + +clf.initialize() + +clf.load_params( + f_params="./results/Models_CrossSession/001-2014/1/braindecode_EEGInception/best_model.pkl", + f_optimizer="./results/Models_CrossSession/001-2014/1/braindecode_EEGInception/best_opt.pkl", + f_history="./results/Models_CrossSession/001-2014/1/braindecode_EEGInception/best_history.json", +) + +# Create the dataset +create_dataset = BraindecodeDatasetLoader(drop_last_window=False) + +# Create the pipelines +pipes_pytorch = Pipeline([("Braindecode_dataset", create_dataset), ("EEGInception", clf)]) From 161aad10334c1be0f658bf1be88be9e371e59fea Mon Sep 17 00:00:00 2001 From: bruAristimunha Date: Wed, 21 Jun 2023 23:19:49 +0200 Subject: [PATCH 23/30] Updating the save model, optimizing the code --- moabb/evaluations/utils.py | 165 ++++++++++++++----------------------- 1 file changed, 61 insertions(+), 104 deletions(-) diff --git a/moabb/evaluations/utils.py b/moabb/evaluations/utils.py index c332b7602..95e9046ad 100644 --- a/moabb/evaluations/utils.py +++ b/moabb/evaluations/utils.py @@ -1,9 +1,11 @@ -from os import makedirs +from __future__ import annotations + from pathlib import Path -from pickle import dump +from pickle import HIGHEST_PROTOCOL, dump from typing import Sequence from numpy import argmax +from sklearn.pipeline import Pipeline def _check_if_is_keras_model(model): @@ -48,7 +50,7 @@ def _check_if_is_pytorch_model(model): return False -def save_model(model, save_path: str, cv_index: int): +def save_model_cv(model, save_path, cv_index): """ Save a model fitted to a folder Parameters @@ -58,48 +60,74 @@ def save_model(model, save_path: str, cv_index: int): save_path: str Path to save the model, will create if it does not exist based on the parameter hdf5_path from the evaluation object. - cv_index: int + cv_index: str Index of the cross-validation fold used to fit the model + or 'best' if the model is the best fitted + Returns ------- - filenames: list - List of filenames where the model is saved - """ - # Save the model - makedirs(save_path, exist_ok=True) + """ if any(_check_if_is_pytorch_model(j) for j in model.named_steps.values()): - from skorch import NeuralNetClassifier + for step_name, step in model.named_steps.item(): + file_step = f"{step_name}_fitted_cv_{cv_index}" - for step in model.named_steps.values(): - if isinstance(step, NeuralNetClassifier): + if _check_if_is_pytorch_model(step): step.save_params( - f_params=Path(save_path) / f"fitted_model_cv_{str(cv_index)}.pkl", - f_optimizer=Path(save_path) / f"opt_cv_{str(cv_index)}.pkl", - f_history=Path(save_path) / f"history_cv_{str(cv_index)}.json", + f_params=Path(save_path) / f"{file_step}_model.pkl", + f_optimizer=Path(save_path) / f"{file_step}_optim.pkl", + f_history=Path(save_path) / f"{file_step}_history.json", + f_criterion=Path(save_path) / f"{file_step}_criterion.pkl", ) + else: + with open((Path(save_path) / f"{file_step}.pkl"), "wb") as file: + dump(model, file, protocol=HIGHEST_PROTOCOL) elif any(_check_if_is_keras_model(j) for j in model.named_steps.values()): - from scikeras.wrappers import KerasClassifier + for step_name, step in model.named_steps.item(): + file_step = f"{step_name}_fitted_model_cv_{cv_index}" + if _check_if_is_keras_model(step): + step.model_.save(Path(save_path) / f"{file_step}.h5") + else: + with open((Path(save_path) / f"{file_step}.pkl"), "wb") as file: + dump(model, file, protocol=HIGHEST_PROTOCOL) + else: + with open((Path(save_path) / f"fitted_model_{cv_index}.pkl"), "wb") as file: + dump(model, file, protocol=HIGHEST_PROTOCOL) - for step in model.named_steps.values(): - if isinstance(step, KerasClassifier): - step.model_.save(Path(save_path) / f"fitted_model_cv_{str(cv_index)}") - else: - with open((Path(save_path) / f"fitted_model_{cv_index}.pkl"), "wb") as f: - dump(model, f) - return +def save_model(model: list | Pipeline, save_path: str, cv_index: str): + """ + Save a model fitted to a folder + Parameters + ---------- + model: object + Model (pipeline) fitted + save_path: str + Path to save the model, will create if it does not exist + based on the parameter hdf5_path from the evaluation object. + cv_index: str + Index of the cross-validation fold used to fit the model + Returns + ------- + filenames: list + List of filenames where the model is saved + """ + # Save the model + Path(save_path).mkdir(parents=True, exist_ok=True) + + save_model_cv(model, save_path, cv_index) -# flake8: noqa: C901 -def save_model_list(model_list: list, score_list: Sequence, save_path: str): +def save_model_list(model_list: list | Pipeline, score_list: Sequence, save_path: str): """ Save a list of models fitted to a folder Parameters ---------- - model_list: list - List of models (pipelines) fitted + model_list: list | Pipeline + List of models or model (pipelines) fitted + score_list: Sequence + List of scores for each model in model_list save_path: str Path to save the models, will create if it does not exist based on the parameter hdf5_path from the evaluation object. @@ -108,89 +136,18 @@ def save_model_list(model_list: list, score_list: Sequence, save_path: str): """ if model_list is None: return - # Save the result - makedirs(save_path, exist_ok=True) - - if not isinstance(model_list, list): - if any(_check_if_is_pytorch_model(j) for j in model_list.named_steps.values()): - from skorch import NeuralNetClassifier - - for step in model_list.named_steps.values(): - if isinstance(step, NeuralNetClassifier): - step.save_params( - f_params=Path(save_path) / "fitted_model.pkl", - f_optimizer=Path(save_path) / "opt.pkl", - f_history=Path(save_path) / "history.json", - ) - - elif any(_check_if_is_keras_model(j) for j in model_list.named_steps.values()): - from scikeras.wrappers import KerasClassifier - - for step in model_list.named_steps.values(): - if isinstance(step, KerasClassifier): - step.model_.save(Path(save_path) / "fitted_model") - - else: - with open((Path(save_path) / "fitted_model.pkl"), "wb") as f: - dump( - model_list, - f, - ) - - else: - for i, model in enumerate(model_list): - if any(_check_if_is_pytorch_model(j) for j in model.named_steps.values()): - from skorch import NeuralNetClassifier - for step in model.named_steps.values(): - if isinstance(step, NeuralNetClassifier): - step.save_params( - f_params=Path(save_path) / f"fitted_model_cv_{str(i)}.pkl", - f_optimizer=Path(save_path) / f"opt_cv_{str(i)}.pkl", - f_history=Path(save_path) / f"history_cv_{str(i)}.json", - ) + Path(save_path).mkdir(parents=True, exist_ok=True) - elif any(_check_if_is_keras_model(j) for j in model.named_steps.values()): - from scikeras.wrappers import KerasClassifier - - for step in model.named_steps.values(): - if isinstance(step, KerasClassifier): - step.model_.save(Path(save_path) / f"fitted_model_cv_{str(i)}") + if not isinstance(model_list, list): + model_list = [model_list] - else: - with open((Path(save_path) / f"fitted_model_cv_{str(i)}.pkl"), "wb") as f: - dump( - model, - f, - ) + for cv_index, model in enumerate(model_list): + save_model_cv(model, save_path, str(cv_index)) - # Saving the best model best_model = model_list[argmax(score_list)] - if any(_check_if_is_pytorch_model(j) for j in best_model.named_steps.values()): - from skorch import NeuralNetClassifier - - for step in best_model.named_steps.values(): - if isinstance(step, NeuralNetClassifier): - step.save_params( - f_params=Path(save_path) / "best_model.pkl", - f_optimizer=Path(save_path) / "best_opt.pkl", - f_history=Path(save_path) / "best_history.json", - ) - - elif any(_check_if_is_keras_model(j) for j in best_model.named_steps.values()): - from scikeras.wrappers import KerasClassifier - - for step in best_model.named_steps.values(): - if isinstance(step, KerasClassifier): - step.model_.save(Path(save_path) / "best_model") - - else: - with open((Path(save_path) / "best_model.pkl"), "wb") as f: - dump( - best_model, - f, - ) + save_model_cv(best_model, save_path, "best") def create_save_path( From 3dab7731cb5f1746eea74b12b761a92c2c8f9f89 Mon Sep 17 00:00:00 2001 From: bruAristimunha Date: Thu, 22 Jun 2023 00:07:42 +0200 Subject: [PATCH 24/30] Fixing saving function --- moabb/evaluations/utils.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/moabb/evaluations/utils.py b/moabb/evaluations/utils.py index 95e9046ad..c8725a556 100644 --- a/moabb/evaluations/utils.py +++ b/moabb/evaluations/utils.py @@ -69,7 +69,8 @@ def save_model_cv(model, save_path, cv_index): """ if any(_check_if_is_pytorch_model(j) for j in model.named_steps.values()): - for step_name, step in model.named_steps.item(): + for step_name in model.named_steps: + step = model.named_steps[step_name] file_step = f"{step_name}_fitted_cv_{cv_index}" if _check_if_is_pytorch_model(step): @@ -84,8 +85,9 @@ def save_model_cv(model, save_path, cv_index): dump(model, file, protocol=HIGHEST_PROTOCOL) elif any(_check_if_is_keras_model(j) for j in model.named_steps.values()): - for step_name, step in model.named_steps.item(): + for step_name in model.named_steps: file_step = f"{step_name}_fitted_model_cv_{cv_index}" + step = model.named_steps[step_name] if _check_if_is_keras_model(step): step.model_.save(Path(save_path) / f"{file_step}.h5") else: From 4511edc665eb151e36ee7d3e6a37f757ecaaea52 Mon Sep 17 00:00:00 2001 From: bruAristimunha Date: Thu, 22 Jun 2023 00:09:35 +0200 Subject: [PATCH 25/30] renaming model to step --- moabb/evaluations/utils.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/moabb/evaluations/utils.py b/moabb/evaluations/utils.py index c8725a556..396ba7a65 100644 --- a/moabb/evaluations/utils.py +++ b/moabb/evaluations/utils.py @@ -82,7 +82,7 @@ def save_model_cv(model, save_path, cv_index): ) else: with open((Path(save_path) / f"{file_step}.pkl"), "wb") as file: - dump(model, file, protocol=HIGHEST_PROTOCOL) + dump(step, file, protocol=HIGHEST_PROTOCOL) elif any(_check_if_is_keras_model(j) for j in model.named_steps.values()): for step_name in model.named_steps: @@ -92,7 +92,7 @@ def save_model_cv(model, save_path, cv_index): step.model_.save(Path(save_path) / f"{file_step}.h5") else: with open((Path(save_path) / f"{file_step}.pkl"), "wb") as file: - dump(model, file, protocol=HIGHEST_PROTOCOL) + dump(step, file, protocol=HIGHEST_PROTOCOL) else: with open((Path(save_path) / f"fitted_model_{cv_index}.pkl"), "wb") as file: dump(model, file, protocol=HIGHEST_PROTOCOL) From e16529b5ca0e498f29363b89aa52ffcc60c69cc3 Mon Sep 17 00:00:00 2001 From: bruAristimunha Date: Thu, 22 Jun 2023 00:27:54 +0200 Subject: [PATCH 26/30] Updating the tutorial --- examples/plot_load_model.py | 16 +++++++++------- 1 file changed, 9 insertions(+), 7 deletions(-) diff --git a/examples/plot_load_model.py b/examples/plot_load_model.py index ea611f8e9..178edfb17 100644 --- a/examples/plot_load_model.py +++ b/examples/plot_load_model.py @@ -43,7 +43,8 @@ # Loading the Scikit-learn pipelines with open( - "./results/Models_WithinSession/Zhou 2016/1/session_0/CSP + SVM/best_model.pkl", "rb" + "./results/Models_WithinSession/Zhou 2016/1/session_0/CSP + SVM/fitted_model_best.pkl", + "rb", ) as pickle_file: CSP_SVM_Trained = load(pickle_file) @@ -52,7 +53,7 @@ # We load the single Keras model, if we want we can set in the exact same pipeline. model_Keras = keras.models.load_model( - "./results/Models_WithinSession/001-2014/1/session_E/Keras_DeepConvNet/best_model" + "./results/Models_WithinSession/001-2014/1/session_E/Keras_DeepConvNet/kerasdeepconvnet_fitted_model_cv_best.h5" ) # Now we need to instantiate a new SciKeras object since we only saved the Keras model Keras_DeepConvNet_Trained = KerasClassifier(model_Keras) @@ -108,11 +109,12 @@ clf.initialize() -clf.load_params( - f_params="./results/Models_CrossSession/001-2014/1/braindecode_EEGInception/best_model.pkl", - f_optimizer="./results/Models_CrossSession/001-2014/1/braindecode_EEGInception/best_opt.pkl", - f_history="./results/Models_CrossSession/001-2014/1/braindecode_EEGInception/best_history.json", -) +f_params = "./results/Models_CrossSession/001-2014/1/braindecode_EEGInception/EEGInception_fitted_cv_best_model.pkl" +f_optimizer = "./results/Models_CrossSession/001-2014/1/braindecode_EEGInception/EEGInception_fitted_cv_best_optim.pkl" +f_history = "./results/Models_CrossSession/001-2014/1/braindecode_EEGInception/EEGInception_fitted_cv_best_history.json" + +clf.load_params(f_params=f_params, f_optimizer=f_optimizer, f_history=f_history) + # Create the dataset create_dataset = BraindecodeDatasetLoader(drop_last_window=False) From 6d1584f73cb25e334a1de85ccdf0d9cf43541e6d Mon Sep 17 00:00:00 2001 From: bruAristimunha Date: Thu, 22 Jun 2023 20:34:58 +0200 Subject: [PATCH 27/30] Updating the path --- examples/plot_load_model.py | 8 ++++---- moabb/evaluations/evaluations.py | 12 +++++++----- moabb/evaluations/utils.py | 31 +++++-------------------------- 3 files changed, 16 insertions(+), 35 deletions(-) diff --git a/examples/plot_load_model.py b/examples/plot_load_model.py index 178edfb17..1a7b1c32c 100644 --- a/examples/plot_load_model.py +++ b/examples/plot_load_model.py @@ -53,7 +53,7 @@ # We load the single Keras model, if we want we can set in the exact same pipeline. model_Keras = keras.models.load_model( - "./results/Models_WithinSession/001-2014/1/session_E/Keras_DeepConvNet/kerasdeepconvnet_fitted_model_cv_best.h5" + "./results/Models_WithinSession/001-2014/1/session_E/Keras_DeepConvNet/kerasdeepconvnet_fitted_model_best.h5" ) # Now we need to instantiate a new SciKeras object since we only saved the Keras model Keras_DeepConvNet_Trained = KerasClassifier(model_Keras) @@ -109,9 +109,9 @@ clf.initialize() -f_params = "./results/Models_CrossSession/001-2014/1/braindecode_EEGInception/EEGInception_fitted_cv_best_model.pkl" -f_optimizer = "./results/Models_CrossSession/001-2014/1/braindecode_EEGInception/EEGInception_fitted_cv_best_optim.pkl" -f_history = "./results/Models_CrossSession/001-2014/1/braindecode_EEGInception/EEGInception_fitted_cv_best_history.json" +f_params = "./results/Models_CrossSession/001-2014/1/braindecode_EEGInception/EEGInception_fitted_best_model.pkl" +f_optimizer = "./results/Models_CrossSession/001-2014/1/braindecode_EEGInception/EEGInception_fitted_best_optim.pkl" +f_history = "./results/Models_CrossSession/001-2014/1/braindecode_EEGInception/EEGInception_fitted_best_history.json" clf.load_params(f_params=f_params, f_optimizer=f_optimizer, f_history=f_history) diff --git a/moabb/evaluations/evaluations.py b/moabb/evaluations/evaluations.py index dfd5d18b2..2e042cb10 100644 --- a/moabb/evaluations/evaluations.py +++ b/moabb/evaluations/evaluations.py @@ -23,7 +23,7 @@ from tqdm import tqdm from moabb.evaluations.base import BaseEvaluation -from moabb.evaluations.utils import create_save_path, save_model, save_model_list +from moabb.evaluations.utils import create_save_path, save_model_cv, save_model_list try: @@ -260,7 +260,7 @@ def _evaluate_subject(self, dataset, pipelines, param_grid, subject): acc.append(scorer(cvclf, X_[test], y_[test])) if self.hdf5_path is not None: - save_model( + save_model_cv( model=cvclf, save_path=model_save_path, cv_index=cv_ind ) @@ -616,8 +616,8 @@ def process_subject(self, subject, param_grid, pipelines, dataset): score = scorer(cvclf, X[test], y[test]) if self.hdf5_path is not None: - save_model( - model=cvclf, save_path=model_save_path, cv_index=cv_ind + save_model_cv( + model=cvclf, save_path=model_save_path, cv_index=str(cv_ind) ) else: result = _fit_and_score( @@ -831,7 +831,9 @@ def evaluate(self, dataset, pipelines, param_grid): eval_type="CrossSubject", ) - save_model(model=model, save_path=model_save_path, cv_index=cv_ind) + save_model_cv( + model=model, save_path=model_save_path, cv_index=str(cv_ind) + ) # we eval on each session for session in np.unique(sessions[test]): ix = sessions[test] == session diff --git a/moabb/evaluations/utils.py b/moabb/evaluations/utils.py index 396ba7a65..e88388f70 100644 --- a/moabb/evaluations/utils.py +++ b/moabb/evaluations/utils.py @@ -50,7 +50,7 @@ def _check_if_is_pytorch_model(model): return False -def save_model_cv(model, save_path, cv_index): +def save_model_cv(model: object, save_path: str | Path, cv_index: str | int): """ Save a model fitted to a folder Parameters @@ -68,10 +68,12 @@ def save_model_cv(model, save_path, cv_index): ------- """ + Path(save_path).mkdir(parents=True, exist_ok=True) + if any(_check_if_is_pytorch_model(j) for j in model.named_steps.values()): for step_name in model.named_steps: step = model.named_steps[step_name] - file_step = f"{step_name}_fitted_cv_{cv_index}" + file_step = f"{step_name}_fitted_{cv_index}" if _check_if_is_pytorch_model(step): step.save_params( @@ -86,7 +88,7 @@ def save_model_cv(model, save_path, cv_index): elif any(_check_if_is_keras_model(j) for j in model.named_steps.values()): for step_name in model.named_steps: - file_step = f"{step_name}_fitted_model_cv_{cv_index}" + file_step = f"{step_name}_fitted_model_{cv_index}" step = model.named_steps[step_name] if _check_if_is_keras_model(step): step.model_.save(Path(save_path) / f"{file_step}.h5") @@ -98,29 +100,6 @@ def save_model_cv(model, save_path, cv_index): dump(model, file, protocol=HIGHEST_PROTOCOL) -def save_model(model: list | Pipeline, save_path: str, cv_index: str): - """ - Save a model fitted to a folder - Parameters - ---------- - model: object - Model (pipeline) fitted - save_path: str - Path to save the model, will create if it does not exist - based on the parameter hdf5_path from the evaluation object. - cv_index: str - Index of the cross-validation fold used to fit the model - Returns - ------- - filenames: list - List of filenames where the model is saved - """ - # Save the model - Path(save_path).mkdir(parents=True, exist_ok=True) - - save_model_cv(model, save_path, cv_index) - - def save_model_list(model_list: list | Pipeline, score_list: Sequence, save_path: str): """ Save a list of models fitted to a folder From 9e4ed4cee557834fe9bb59356039bac390ab5fd7 Mon Sep 17 00:00:00 2001 From: bruAristimunha Date: Thu, 22 Jun 2023 20:40:20 +0200 Subject: [PATCH 28/30] Adding new test and fix __init__.py --- moabb/evaluations/__init__.py | 2 +- moabb/tests/evaluations.py | 53 ++++++++++++++++++++++++++++++++++- 2 files changed, 53 insertions(+), 2 deletions(-) diff --git a/moabb/evaluations/__init__.py b/moabb/evaluations/__init__.py index b2af402e9..ec86b8e29 100644 --- a/moabb/evaluations/__init__.py +++ b/moabb/evaluations/__init__.py @@ -10,4 +10,4 @@ CrossSubjectEvaluation, WithinSessionEvaluation, ) -from .utils import create_save_path, save_model, save_model_list +from .utils import create_save_path, save_model_cv, save_model_list diff --git a/moabb/tests/evaluations.py b/moabb/tests/evaluations.py index d1bb782ed..65157f4b4 100644 --- a/moabb/tests/evaluations.py +++ b/moabb/tests/evaluations.py @@ -17,6 +17,7 @@ from moabb.analysis.results import get_string_rep from moabb.datasets.fake import FakeDataset from moabb.evaluations import evaluations as ev +from moabb.evaluations.utils import create_save_path, save_model_cv, save_model_list from moabb.paradigms.motor_imagery import FakeImageryParadigm @@ -27,7 +28,6 @@ except ImportError: _carbonfootprint = False - pipelines = OrderedDict() pipelines["C"] = make_pipeline(Covariances("oas"), CSP(8), LDA()) dataset = FakeDataset(["left_hand", "right_hand"], n_subjects=2) @@ -276,5 +276,56 @@ def test_compatible_dataset(self): self.assertTrue(self.eval.is_valid(dataset=ds)) +class UtilEvaluation(unittest.TestCase): + def test_save_model_cv(self): + model = DummyClassifier() + save_path = "test_save_path" + cv_index = 0 + save_model_cv(model, save_path, cv_index) + + # Assert that the saved model file exists + self.assertTrue(os.path.isfile(os.path.join(save_path, "fitted_model_0.pkl"))) + + def test_save_model_list(self): + model = DummyClassifier() + model_list = [model] + score_list = [0.8] + save_path = "test_save_path" + save_model_list(model_list, score_list, save_path) + + # Assert that the saved model file for best model exists + self.assertTrue(os.path.isfile(os.path.join(save_path, "fitted_model_best.pkl"))) + + def test_create_save_path(self): + hdf5_path = "base_path" + code = "evaluation_code" + subject = 1 + session = "session_0" + name = "evaluation_name" + eval_type = "WithinSession" + save_path = create_save_path( + hdf5_path, code, subject, session, name, eval_type=eval_type + ) + + expected_path = os.path.join( + hdf5_path, "Models_WithinSession", code, "1", "session_0", "evaluation_name" + ) + self.assertEqual(save_path, expected_path) + + grid_save_path = create_save_path( + hdf5_path, code, subject, session, name, grid=True, eval_type=eval_type + ) + + expected_grid_path = os.path.join( + hdf5_path, + "GridSearch_WithinSession", + code, + "1", + "session_0", + "evaluation_name", + ) + self.assertEqual(grid_save_path, expected_grid_path) + + if __name__ == "__main__": unittest.main() From f4a6abdeff61e14362d6c9c3da8173e2c91f1f69 Mon Sep 17 00:00:00 2001 From: bruAristimunha Date: Thu, 22 Jun 2023 21:12:09 +0200 Subject: [PATCH 29/30] Adding new tests --- moabb/evaluations/utils.py | 31 ++++++- moabb/tests/evaluations.py | 172 ++++++++++++++++++++++++++++++++++++- 2 files changed, 196 insertions(+), 7 deletions(-) diff --git a/moabb/evaluations/utils.py b/moabb/evaluations/utils.py index e88388f70..e8241e8c2 100644 --- a/moabb/evaluations/utils.py +++ b/moabb/evaluations/utils.py @@ -50,6 +50,26 @@ def _check_if_is_pytorch_model(model): return False +def _check_if_is_pytorch_steps(model): + skorch_valid = False + try: + skorch_valid = any( + _check_if_is_pytorch_model(j) for j in model.named_steps.values() + ) + return skorch_valid + except Exception: + return skorch_valid + + +def _check_if_is_keras_steps(model): + keras_valid = False + try: + keras_valid = any(_check_if_is_keras_model(j) for j in model.named_steps.values()) + return keras_valid + except Exception: + return keras_valid + + def save_model_cv(model: object, save_path: str | Path, cv_index: str | int): """ Save a model fitted to a folder @@ -68,9 +88,12 @@ def save_model_cv(model: object, save_path: str | Path, cv_index: str | int): ------- """ - Path(save_path).mkdir(parents=True, exist_ok=True) + if save_path is None: + raise IOError("No path to save the model") + else: + Path(save_path).mkdir(parents=True, exist_ok=True) - if any(_check_if_is_pytorch_model(j) for j in model.named_steps.values()): + if _check_if_is_pytorch_steps(model): for step_name in model.named_steps: step = model.named_steps[step_name] file_step = f"{step_name}_fitted_{cv_index}" @@ -86,7 +109,7 @@ def save_model_cv(model: object, save_path: str | Path, cv_index: str | int): with open((Path(save_path) / f"{file_step}.pkl"), "wb") as file: dump(step, file, protocol=HIGHEST_PROTOCOL) - elif any(_check_if_is_keras_model(j) for j in model.named_steps.values()): + elif _check_if_is_keras_steps(model): for step_name in model.named_steps: file_step = f"{step_name}_fitted_model_{cv_index}" step = model.named_steps[step_name] @@ -134,7 +157,7 @@ def save_model_list(model_list: list | Pipeline, score_list: Sequence, save_path def create_save_path( hdf5_path, code: str, - subject: int, + subject: int | str, session: str, name: str, grid=False, diff --git a/moabb/tests/evaluations.py b/moabb/tests/evaluations.py index 65157f4b4..22411e9ad 100644 --- a/moabb/tests/evaluations.py +++ b/moabb/tests/evaluations.py @@ -11,8 +11,9 @@ from pyriemann.estimation import Covariances from pyriemann.spatialfilters import CSP from sklearn.discriminant_analysis import LinearDiscriminantAnalysis as LDA +from sklearn.dummy import DummyClassifier as Dummy from sklearn.model_selection import GridSearchCV -from sklearn.pipeline import make_pipeline +from sklearn.pipeline import Pipeline, make_pipeline from moabb.analysis.results import get_string_rep from moabb.datasets.fake import FakeDataset @@ -278,16 +279,18 @@ def test_compatible_dataset(self): class UtilEvaluation(unittest.TestCase): def test_save_model_cv(self): - model = DummyClassifier() + model = Dummy() save_path = "test_save_path" cv_index = 0 + save_model_cv(model, save_path, cv_index) # Assert that the saved model file exists self.assertTrue(os.path.isfile(os.path.join(save_path, "fitted_model_0.pkl"))) def test_save_model_list(self): - model = DummyClassifier() + step = Dummy() + model = Pipeline([("step", step)]) model_list = [model] score_list = [0.8] save_path = "test_save_path" @@ -326,6 +329,169 @@ def test_create_save_path(self): ) self.assertEqual(grid_save_path, expected_grid_path) + def test_save_model_cv_with_pytorch_model(self): + try: + import torch + from skorch import NeuralNetClassifier + except ImportError: + self.skipTest("skorch library not available") + + step = NeuralNetClassifier(module=torch.nn.Linear(10, 2)) + step.initialize() + model = Pipeline([("step", step)]) + save_path = "." + cv_index = 0 + save_model_cv(model, save_path, cv_index) + + # Assert that the saved model files exist + self.assertTrue( + os.path.isfile(os.path.join(save_path, "step_fitted_0_model.pkl")) + ) + self.assertTrue( + os.path.isfile(os.path.join(save_path, "step_fitted_0_optim.pkl")) + ) + self.assertTrue( + os.path.isfile(os.path.join(save_path, "step_fitted_0_history.json")) + ) + self.assertTrue( + os.path.isfile(os.path.join(save_path, "step_fitted_0_criterion.pkl")) + ) + + def test_save_model_list_with_multiple_models(self): + model1 = Dummy() + model2 = Dummy() + model_list = [model1, model2] + score_list = [0.8, 0.9] + save_path = "test_save_path" + save_model_list(model_list, score_list, save_path) + + # Assert that the saved model files for each model exist + self.assertTrue(os.path.isfile(os.path.join(save_path, "fitted_model_0.pkl"))) + self.assertTrue(os.path.isfile(os.path.join(save_path, "fitted_model_1.pkl"))) + + # Assert that the saved model file for the best model exists + self.assertTrue(os.path.isfile(os.path.join(save_path, "fitted_model_best.pkl"))) + + def test_create_save_path_with_cross_session_evaluation(self): + hdf5_path = "base_path" + code = "evaluation_code" + subject = 1 + session = "session_0" + name = "evaluation_name" + eval_type = "CrossSession" + save_path = create_save_path( + hdf5_path, code, subject, session, name, eval_type=eval_type + ) + + expected_path = os.path.join( + hdf5_path, "Models_CrossSession", code, "1", "evaluation_name" + ) + self.assertEqual(save_path, expected_path) + + grid_save_path = create_save_path( + hdf5_path, code, subject, session, name, grid=True, eval_type=eval_type + ) + + expected_grid_path = os.path.join( + hdf5_path, "GridSearch_CrossSession", code, "1", "evaluation_name" + ) + self.assertEqual(grid_save_path, expected_grid_path) + + def test_create_save_path_without_hdf5_path(self): + hdf5_path = None + code = "evaluation_code" + subject = 1 + session = "session_0" + name = "evaluation_name" + eval_type = "WithinSession" + save_path = create_save_path( + hdf5_path, code, subject, session, name, eval_type=eval_type + ) + + self.assertIsNone(save_path) + + def test_save_model_cv_without_hdf5_path(self): + model = DummyClassifier(kernel="rbf") + save_path = None + cv_index = 0 + + # Assert that calling save_model_cv without a save_path does raise an IOError + with self.assertRaises(IOError): + save_model_cv(model, save_path, cv_index) + + def test_save_model_list_with_single_model(self): + model = Dummy() + model_list = model + score_list = [0.8] + save_path = "test_save_path" + save_model_list(model_list, score_list, save_path) + + # Assert that the saved model file for the single model exists + self.assertTrue(os.path.isfile(os.path.join(save_path, "fitted_model_0.pkl"))) + + # Assert that the saved model file for the best model exists + self.assertTrue(os.path.isfile(os.path.join(save_path, "fitted_model_best.pkl"))) + + def test_create_save_path_with_cross_subject_evaluation(self): + hdf5_path = "base_path" + code = "evaluation_code" + subject = "1" + session = "" + name = "evaluation_name" + eval_type = "CrossSubject" + save_path = create_save_path( + hdf5_path, code, subject, session, name, eval_type=eval_type + ) + + expected_path = os.path.join( + hdf5_path, "Models_CrossSubject", code, "1", "evaluation_name" + ) + self.assertEqual(save_path, expected_path) + + grid_save_path = create_save_path( + hdf5_path, code, subject, session, name, grid=True, eval_type=eval_type + ) + + expected_grid_path = os.path.join( + hdf5_path, "GridSearch_CrossSubject", code, "1", "evaluation_name" + ) + self.assertEqual(grid_save_path, expected_grid_path) + + def test_create_save_path_without_hdf5_path_or_session(self): + hdf5_path = None + code = "evaluation_code" + subject = 1 + session = "" + name = "evaluation_name" + eval_type = "WithinSession" + save_path = create_save_path( + hdf5_path, code, subject, session, name, eval_type=eval_type + ) + + self.assertIsNone(save_path) + + grid_save_path = create_save_path( + hdf5_path, code, subject, session, name, grid=True, eval_type=eval_type + ) + + self.assertIsNone(grid_save_path) + + def test_create_save_path_with_special_characters(self): + hdf5_path = "base_path" + code = "evaluation_code" + subject = 1 + session = "session_0" + name = "evalu@tion#name" + eval_type = "WithinSession" + save_path = create_save_path( + hdf5_path, code, subject, session, name, eval_type=eval_type + ) + + expected_path = os.path.join( + hdf5_path, "Models_WithinSession", code, "1", "session_0", "evalu@tion#name" + ) + self.assertEqual(save_path, expected_path) + if __name__ == "__main__": unittest.main() From 89df3ee888b8ab052d09391eca12523055f0977e Mon Sep 17 00:00:00 2001 From: bruAristimunha Date: Thu, 22 Jun 2023 21:15:31 +0200 Subject: [PATCH 30/30] Updating whats new file --- docs/source/whats_new.rst | 1 + 1 file changed, 1 insertion(+) diff --git a/docs/source/whats_new.rst b/docs/source/whats_new.rst index f7db69975..ce0a56e94 100644 --- a/docs/source/whats_new.rst +++ b/docs/source/whats_new.rst @@ -35,6 +35,7 @@ Bugs - Restore 3 subject from Cho2017 (:gh:`392` by `Igor Carrara`_ and `Sylvain Chevallier`_) - Correct downloading with VirtualReality BrainInvaders dataset (:gh:`393` by `Gregoire Cattan`_) - Rename event `substraction` to `subtraction` in :func:`moabb.dataset.Shin2017B` (:gh:`397` by `Pierre Guetschel`_) +- Fixing issue with parallel evaluation (:gh:`401` by `Bruno Aristimunha`_ and `Igor Carrara`_) API changes ~~~~~~~~~~~