Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fix dataset downloading errors #318

Merged
merged 5 commits into from
Jan 4, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions docs/source/whats_new.rst
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,7 @@ Bugs
- Incorrect event assignation for Lee2019 in MNE >= 1.0.0 (:gh:`298` by `Sylvain Chevallier`_)
- Correct usage of name simplification function in analyze (:gh:`306` by `Divyesh Narayanan`_)
- Fix downloading path issue for Weibo2014 and Zhou2016, numy error in DemonsP300 (:gh:`315` by `Sylvain Chevallier`_)
- Fix unzip error for Huebner2017 and Huebner2018 (:gh:`318` by `Sylvain Chevallier`_)

API changes
~~~~~~~~~~~
Expand Down
3 changes: 3 additions & 0 deletions moabb/datasets/bnci.py
Original file line number Diff line number Diff line change
Expand Up @@ -239,6 +239,9 @@ def _load_data_009_2014(
event_id = {}
for run in data:
raw, ev = _convert_run_p300_sl(run, verbose=verbose)
# Raw EEG data are scaled by a factor 10.
# See https://github.com/NeuroTechX/moabb/issues/275
raw._data[:16, :] /= 10.0
sess.append(raw)
event_id.update(ev)

Expand Down
34 changes: 19 additions & 15 deletions moabb/datasets/download.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
import json
import os
import os.path as osp
from pathlib import Path

import requests
from mne import get_config, set_config
Expand Down Expand Up @@ -39,17 +40,17 @@ def get_dataset_path(sign, path):
key = "MNE_DATASETS_{:s}_PATH".format(sign)
if get_config(key) is None:
if get_config("MNE_DATA") is None:
path_def = osp.join(osp.expanduser("~"), "mne_data")
path_def = Path.home() / "mne_data"
print(
"MNE_DATA is not already configured. It will be set to "
"default location in the home directory - "
+ path_def
+ str(path_def)
+ "\nAll datasets will be downloaded to this location, if anything is "
"already downloaded, please move manually to this location"
)
if not osp.isdir(path_def):
os.makedirs(path_def)
set_config("MNE_DATA", osp.join(osp.expanduser("~"), "mne_data"))
if not path_def.is_dir():
path_def.mkdir(parents=True)
set_config("MNE_DATA", str(Path.home() / "mne_data"))
set_config(key, get_config("MNE_DATA"))
return _get_path(path, key, sign)

Expand Down Expand Up @@ -132,24 +133,27 @@ def data_dl(url, sign, path=None, force_update=False, verbose=None):
Local path to the given data file. This path is contained inside a list
of length one, for compatibility.
"""
path = get_dataset_path(sign, path)
path = Path(get_dataset_path(sign, path))
key_dest = "MNE-{:s}-data".format(sign.lower())
destination = _url_to_local_path(url, osp.join(path, key_dest))
destination = _url_to_local_path(url, path / key_dest)
destination = str(path) + destination.split(str(path))[1]
table = {ord(c): "-" for c in ':*?"<>|'}
destination = Path(str(path) + destination.split(str(path))[1].translate(table))

# Fetch the file
if not osp.isfile(destination) or force_update:
if osp.isfile(destination):
os.remove(destination)
if not osp.isdir(osp.dirname(destination)):
os.makedirs(osp.dirname(destination))
if not destination.is_file() or force_update:
if destination.is_file():
destination.unlink()
if not destination.parent.is_dir():
destination.parent.mkdir(parents=True)
known_hash = None
else:
known_hash = file_hash(destination)
known_hash = file_hash(str(destination))
dlpath = retrieve(
url,
known_hash,
fname=osp.basename(url),
path=osp.dirname(destination),
fname=Path(url).name,
path=str(destination.parent),
progressbar=True,
)
return dlpath
Expand Down
38 changes: 14 additions & 24 deletions moabb/datasets/huebner_llp.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,7 @@
import glob
import os
import re
import zipfile
from abc import ABC
from pathlib import Path

import mne
import numpy as np
Expand Down Expand Up @@ -40,15 +39,16 @@ def __init__(

@staticmethod
def _filename_trial_info_extraction(vhdr_file_path):
vhdr_file_name = os.path.basename(vhdr_file_path)
vhdr_file_path = Path(vhdr_file_path)
vhdr_file_name = vhdr_file_path.name
run_file_pattern = "^matrixSpeller_Block([0-9]+)_Run([0-9]+)\\.vhdr$"
vhdr_file_patter_match = re.match(run_file_pattern, vhdr_file_name)

if not vhdr_file_patter_match:
# TODO: raise a wild exception?
print(vhdr_file_path)

session_name = os.path.basename(os.path.dirname(vhdr_file_path))
session_name = vhdr_file_path.parent.name
block_idx = vhdr_file_patter_match.group(1)
run_idx = vhdr_file_patter_match.group(2)
return session_name, block_idx, run_idx
Expand Down Expand Up @@ -84,29 +84,19 @@ def data_path(
self, subject, path=None, force_update=False, update_path=None, verbose=None
):
url = f"{self._src_url}subject{subject:02d}.zip"
data_archive_path = dl.data_dl(url, "llp")
data_dir_extracted_path = os.path.dirname(data_archive_path)
# else:
# raise ValueError(f'URL or data path must be given but both are None.')

subject_dir_path = os.path.join(data_dir_extracted_path, f"subject{subject:02d}")

data_extracted = os.path.isdir(subject_dir_path)
if not data_extracted:
# print('unzip', path_to_data_archive) # TODO logging? check verbose
zipfile_path = glob.glob(
os.path.join(data_dir_extracted_path, data_archive_path, "*.zip")
)[0]
zipfile_path = Path(dl.data_dl(url, "llp"))
zipfile_extracted_path = zipfile_path.parent

subject_dir_path = zipfile_extracted_path / f"subject{subject:02d}"

if not subject_dir_path.is_dir():
_BaseVisualMatrixSpellerDataset._extract_data(
data_dir_extracted_path, zipfile_path
zipfile_extracted_path, zipfile_path
)

run_glob_pattern = os.path.join(
data_dir_extracted_path,
f"subject{subject:02d}",
"matrixSpeller_Block*_Run*.vhdr",
)
subject_paths = glob.glob(run_glob_pattern)
subject_paths = zipfile_extracted_path / f"subject{subject:02d}"
subject_paths = subject_paths.glob("matrixSpeller_Block*_Run*.vhdr")
subject_paths = [str(p) for p in subject_paths]
return sorted(subject_paths)

@staticmethod
Expand Down
Loading