Fix dataset downloading errors (#318)

* fix Huebner zip error * add whats new * use pathlib and fix issue in script * fix issue #275 * switch to pathlib and correct Windows path issue #300 Co-authored-by: Sylvain Chevallier <[email protected]>
NeuroTechX · Jan 4, 2023 · 4a418aa · 4a418aa
1 parent 6fb4795
commit 4a418aa
Show file tree

Hide file tree

Showing 5 changed files with 132 additions and 129 deletions.
diff --git a/docs/source/whats_new.rst b/docs/source/whats_new.rst
@@ -30,6 +30,7 @@ Bugs
 - Incorrect event assignation for Lee2019 in MNE >= 1.0.0 (:gh:`298` by `Sylvain Chevallier`_)
 - Correct usage of name simplification function in analyze (:gh:`306` by `Divyesh Narayanan`_)
 - Fix downloading path issue for Weibo2014 and Zhou2016, numy error in DemonsP300 (:gh:`315` by `Sylvain Chevallier`_)
+- Fix unzip error for Huebner2017 and Huebner2018 (:gh:`318` by `Sylvain Chevallier`_)
 
 API changes
 ~~~~~~~~~~~

diff --git a/moabb/datasets/bnci.py b/moabb/datasets/bnci.py
@@ -239,6 +239,9 @@ def _load_data_009_2014(
     event_id = {}
     for run in data:
         raw, ev = _convert_run_p300_sl(run, verbose=verbose)
+        # Raw EEG data are scaled by a factor 10.
+        # See https://github.com/NeuroTechX/moabb/issues/275
+        raw._data[:16, :] /= 10.0
         sess.append(raw)
         event_id.update(ev)
 

diff --git a/moabb/datasets/download.py b/moabb/datasets/download.py
@@ -5,6 +5,7 @@
 import json
 import os
 import os.path as osp
+from pathlib import Path
 
 import requests
 from mne import get_config, set_config
@@ -39,17 +40,17 @@ def get_dataset_path(sign, path):
     key = "MNE_DATASETS_{:s}_PATH".format(sign)
     if get_config(key) is None:
         if get_config("MNE_DATA") is None:
-            path_def = osp.join(osp.expanduser("~"), "mne_data")
+            path_def = Path.home() / "mne_data"
             print(
                 "MNE_DATA is not already configured. It will be set to "
                 "default location in the home directory - "
-                + path_def
+                + str(path_def)
                 + "\nAll datasets will be downloaded to this location, if anything is "
                 "already downloaded, please move manually to this location"
             )
-            if not osp.isdir(path_def):
-                os.makedirs(path_def)
-            set_config("MNE_DATA", osp.join(osp.expanduser("~"), "mne_data"))
+            if not path_def.is_dir():
+                path_def.mkdir(parents=True)
+            set_config("MNE_DATA", str(Path.home() / "mne_data"))
         set_config(key, get_config("MNE_DATA"))
     return _get_path(path, key, sign)
 
@@ -132,24 +133,27 @@ def data_dl(url, sign, path=None, force_update=False, verbose=None):
         Local path to the given data file. This path is contained inside a list
         of length one, for compatibility.
     """
-    path = get_dataset_path(sign, path)
+    path = Path(get_dataset_path(sign, path))
     key_dest = "MNE-{:s}-data".format(sign.lower())
-    destination = _url_to_local_path(url, osp.join(path, key_dest))
+    destination = _url_to_local_path(url, path / key_dest)
+    destination = str(path) + destination.split(str(path))[1]
+    table = {ord(c): "-" for c in ':*?"<>|'}
+    destination = Path(str(path) + destination.split(str(path))[1].translate(table))
 
     # Fetch the file
-    if not osp.isfile(destination) or force_update:
-        if osp.isfile(destination):
-            os.remove(destination)
-        if not osp.isdir(osp.dirname(destination)):
-            os.makedirs(osp.dirname(destination))
+    if not destination.is_file() or force_update:
+        if destination.is_file():
+            destination.unlink()
+        if not destination.parent.is_dir():
+            destination.parent.mkdir(parents=True)
         known_hash = None
     else:
-        known_hash = file_hash(destination)
+        known_hash = file_hash(str(destination))
     dlpath = retrieve(
         url,
         known_hash,
-        fname=osp.basename(url),
-        path=osp.dirname(destination),
+        fname=Path(url).name,
+        path=str(destination.parent),
         progressbar=True,
     )
     return dlpath

diff --git a/moabb/datasets/huebner_llp.py b/moabb/datasets/huebner_llp.py
@@ -1,8 +1,7 @@
-import glob
-import os
 import re
 import zipfile
 from abc import ABC
+from pathlib import Path
 
 import mne
 import numpy as np
@@ -40,15 +39,16 @@ def __init__(
 
     @staticmethod
     def _filename_trial_info_extraction(vhdr_file_path):
-        vhdr_file_name = os.path.basename(vhdr_file_path)
+        vhdr_file_path = Path(vhdr_file_path)
+        vhdr_file_name = vhdr_file_path.name
         run_file_pattern = "^matrixSpeller_Block([0-9]+)_Run([0-9]+)\\.vhdr$"
         vhdr_file_patter_match = re.match(run_file_pattern, vhdr_file_name)
 
         if not vhdr_file_patter_match:
             # TODO: raise a wild exception?
             print(vhdr_file_path)
 
-        session_name = os.path.basename(os.path.dirname(vhdr_file_path))
+        session_name = vhdr_file_path.parent.name
         block_idx = vhdr_file_patter_match.group(1)
         run_idx = vhdr_file_patter_match.group(2)
         return session_name, block_idx, run_idx
@@ -84,29 +84,19 @@ def data_path(
         self, subject, path=None, force_update=False, update_path=None, verbose=None
     ):
         url = f"{self._src_url}subject{subject:02d}.zip"
-        data_archive_path = dl.data_dl(url, "llp")
-        data_dir_extracted_path = os.path.dirname(data_archive_path)
-        # else:
-        #     raise ValueError(f'URL or data path must be given but both are None.')
-
-        subject_dir_path = os.path.join(data_dir_extracted_path, f"subject{subject:02d}")
-
-        data_extracted = os.path.isdir(subject_dir_path)
-        if not data_extracted:
-            # print('unzip', path_to_data_archive)  # TODO logging? check verbose
-            zipfile_path = glob.glob(
-                os.path.join(data_dir_extracted_path, data_archive_path, "*.zip")
-            )[0]
+        zipfile_path = Path(dl.data_dl(url, "llp"))
+        zipfile_extracted_path = zipfile_path.parent
+
+        subject_dir_path = zipfile_extracted_path / f"subject{subject:02d}"
+
+        if not subject_dir_path.is_dir():
             _BaseVisualMatrixSpellerDataset._extract_data(
-                data_dir_extracted_path, zipfile_path
+                zipfile_extracted_path, zipfile_path
             )
 
-        run_glob_pattern = os.path.join(
-            data_dir_extracted_path,
-            f"subject{subject:02d}",
-            "matrixSpeller_Block*_Run*.vhdr",
-        )
-        subject_paths = glob.glob(run_glob_pattern)
+        subject_paths = zipfile_extracted_path / f"subject{subject:02d}"
+        subject_paths = subject_paths.glob("matrixSpeller_Block*_Run*.vhdr")
+        subject_paths = [str(p) for p in subject_paths]
         return sorted(subject_paths)
 
     @staticmethod