Skip to content

Commit

Permalink
Fix dataset downloading errors (#318)
Browse files Browse the repository at this point in the history
* fix Huebner zip error

* add whats new

* use pathlib and fix issue in script

* fix issue #275

* switch to pathlib and correct Windows path issue #300

Co-authored-by: Sylvain Chevallier <[email protected]>
  • Loading branch information
sylvchev and Sylvain Chevallier authored Jan 4, 2023
1 parent 6fb4795 commit 4a418aa
Show file tree
Hide file tree
Showing 5 changed files with 132 additions and 129 deletions.
1 change: 1 addition & 0 deletions docs/source/whats_new.rst
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,7 @@ Bugs
- Incorrect event assignation for Lee2019 in MNE >= 1.0.0 (:gh:`298` by `Sylvain Chevallier`_)
- Correct usage of name simplification function in analyze (:gh:`306` by `Divyesh Narayanan`_)
- Fix downloading path issue for Weibo2014 and Zhou2016, numy error in DemonsP300 (:gh:`315` by `Sylvain Chevallier`_)
- Fix unzip error for Huebner2017 and Huebner2018 (:gh:`318` by `Sylvain Chevallier`_)

API changes
~~~~~~~~~~~
Expand Down
3 changes: 3 additions & 0 deletions moabb/datasets/bnci.py
Original file line number Diff line number Diff line change
Expand Up @@ -239,6 +239,9 @@ def _load_data_009_2014(
event_id = {}
for run in data:
raw, ev = _convert_run_p300_sl(run, verbose=verbose)
# Raw EEG data are scaled by a factor 10.
# See https://github.com/NeuroTechX/moabb/issues/275
raw._data[:16, :] /= 10.0
sess.append(raw)
event_id.update(ev)

Expand Down
34 changes: 19 additions & 15 deletions moabb/datasets/download.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
import json
import os
import os.path as osp
from pathlib import Path

import requests
from mne import get_config, set_config
Expand Down Expand Up @@ -39,17 +40,17 @@ def get_dataset_path(sign, path):
key = "MNE_DATASETS_{:s}_PATH".format(sign)
if get_config(key) is None:
if get_config("MNE_DATA") is None:
path_def = osp.join(osp.expanduser("~"), "mne_data")
path_def = Path.home() / "mne_data"
print(
"MNE_DATA is not already configured. It will be set to "
"default location in the home directory - "
+ path_def
+ str(path_def)
+ "\nAll datasets will be downloaded to this location, if anything is "
"already downloaded, please move manually to this location"
)
if not osp.isdir(path_def):
os.makedirs(path_def)
set_config("MNE_DATA", osp.join(osp.expanduser("~"), "mne_data"))
if not path_def.is_dir():
path_def.mkdir(parents=True)
set_config("MNE_DATA", str(Path.home() / "mne_data"))
set_config(key, get_config("MNE_DATA"))
return _get_path(path, key, sign)

Expand Down Expand Up @@ -132,24 +133,27 @@ def data_dl(url, sign, path=None, force_update=False, verbose=None):
Local path to the given data file. This path is contained inside a list
of length one, for compatibility.
"""
path = get_dataset_path(sign, path)
path = Path(get_dataset_path(sign, path))
key_dest = "MNE-{:s}-data".format(sign.lower())
destination = _url_to_local_path(url, osp.join(path, key_dest))
destination = _url_to_local_path(url, path / key_dest)
destination = str(path) + destination.split(str(path))[1]
table = {ord(c): "-" for c in ':*?"<>|'}
destination = Path(str(path) + destination.split(str(path))[1].translate(table))

# Fetch the file
if not osp.isfile(destination) or force_update:
if osp.isfile(destination):
os.remove(destination)
if not osp.isdir(osp.dirname(destination)):
os.makedirs(osp.dirname(destination))
if not destination.is_file() or force_update:
if destination.is_file():
destination.unlink()
if not destination.parent.is_dir():
destination.parent.mkdir(parents=True)
known_hash = None
else:
known_hash = file_hash(destination)
known_hash = file_hash(str(destination))
dlpath = retrieve(
url,
known_hash,
fname=osp.basename(url),
path=osp.dirname(destination),
fname=Path(url).name,
path=str(destination.parent),
progressbar=True,
)
return dlpath
Expand Down
38 changes: 14 additions & 24 deletions moabb/datasets/huebner_llp.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,7 @@
import glob
import os
import re
import zipfile
from abc import ABC
from pathlib import Path

import mne
import numpy as np
Expand Down Expand Up @@ -40,15 +39,16 @@ def __init__(

@staticmethod
def _filename_trial_info_extraction(vhdr_file_path):
vhdr_file_name = os.path.basename(vhdr_file_path)
vhdr_file_path = Path(vhdr_file_path)
vhdr_file_name = vhdr_file_path.name
run_file_pattern = "^matrixSpeller_Block([0-9]+)_Run([0-9]+)\\.vhdr$"
vhdr_file_patter_match = re.match(run_file_pattern, vhdr_file_name)

if not vhdr_file_patter_match:
# TODO: raise a wild exception?
print(vhdr_file_path)

session_name = os.path.basename(os.path.dirname(vhdr_file_path))
session_name = vhdr_file_path.parent.name
block_idx = vhdr_file_patter_match.group(1)
run_idx = vhdr_file_patter_match.group(2)
return session_name, block_idx, run_idx
Expand Down Expand Up @@ -84,29 +84,19 @@ def data_path(
self, subject, path=None, force_update=False, update_path=None, verbose=None
):
url = f"{self._src_url}subject{subject:02d}.zip"
data_archive_path = dl.data_dl(url, "llp")
data_dir_extracted_path = os.path.dirname(data_archive_path)
# else:
# raise ValueError(f'URL or data path must be given but both are None.')

subject_dir_path = os.path.join(data_dir_extracted_path, f"subject{subject:02d}")

data_extracted = os.path.isdir(subject_dir_path)
if not data_extracted:
# print('unzip', path_to_data_archive) # TODO logging? check verbose
zipfile_path = glob.glob(
os.path.join(data_dir_extracted_path, data_archive_path, "*.zip")
)[0]
zipfile_path = Path(dl.data_dl(url, "llp"))
zipfile_extracted_path = zipfile_path.parent

subject_dir_path = zipfile_extracted_path / f"subject{subject:02d}"

if not subject_dir_path.is_dir():
_BaseVisualMatrixSpellerDataset._extract_data(
data_dir_extracted_path, zipfile_path
zipfile_extracted_path, zipfile_path
)

run_glob_pattern = os.path.join(
data_dir_extracted_path,
f"subject{subject:02d}",
"matrixSpeller_Block*_Run*.vhdr",
)
subject_paths = glob.glob(run_glob_pattern)
subject_paths = zipfile_extracted_path / f"subject{subject:02d}"
subject_paths = subject_paths.glob("matrixSpeller_Block*_Run*.vhdr")
subject_paths = [str(p) for p in subject_paths]
return sorted(subject_paths)

@staticmethod
Expand Down
Loading

0 comments on commit 4a418aa

Please sign in to comment.