diff --git a/requirements.txt b/requirements.txt index fe720b6..e06a234 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,3 +1,4 @@ +tqdm numpy scikit-learn pandas diff --git a/setup.cfg b/setup.cfg index 5d5f94d..055e6d4 100644 --- a/setup.cfg +++ b/setup.cfg @@ -23,6 +23,7 @@ extend-ignore = # basic dependencies basic = + tqdm numpy scikit-learn pandas diff --git a/setup.py b/setup.py index 5e9741d..913ec2f 100644 --- a/setup.py +++ b/setup.py @@ -44,6 +44,7 @@ packages=find_packages(exclude=["tests"]), include_package_data=True, install_requires=[ + "tqdm", "numpy", "scikit-learn", "pandas", diff --git a/tsdb/utils/downloading.py b/tsdb/utils/downloading.py index 0f7a00a..451d84f 100644 --- a/tsdb/utils/downloading.py +++ b/tsdb/utils/downloading.py @@ -9,10 +9,12 @@ import os import shutil import tempfile -import urllib.request import warnings from typing import Optional +import requests +from tqdm import tqdm + from .logging import logger from ..database import DATABASE @@ -54,7 +56,27 @@ def _download_and_extract(url: str, saving_path: str) -> Optional[str]: # download and save the raw dataset try: - urllib.request.urlretrieve(url, raw_data_saving_path) + with requests.get(url, stream=True) as r: + r.raise_for_status() + chunk_size = 8192 + try: + size = int(r.headers["Content-Length"]) + except KeyError: + size = None + + with tqdm( + unit="B", + unit_scale=True, + unit_divisor=1024, + miniters=1, + desc=f"Downloading {file_name}", + total=size, + ) as pbar: + with open(raw_data_saving_path, "wb") as f: + for chunk in r.iter_content(chunk_size=chunk_size): + f.write(chunk) + pbar.update(len(chunk)) + except Exception as e: shutil.rmtree(saving_path, ignore_errors=True) shutil.rmtree(raw_data_saving_path, ignore_errors=True)