Skip to content

Commit

Permalink
add download of file if path of read-function is URL
Browse files Browse the repository at this point in the history
  • Loading branch information
thartl-diw committed Jan 10, 2025
1 parent 150e825 commit f1b8665
Showing 1 changed file with 29 additions and 3 deletions.
32 changes: 29 additions & 3 deletions opendataformat/read_odf.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,8 +11,9 @@
import zipfile
import xml.etree.ElementTree as ET
import os


from tempfile import gettempdir
from urllib.request import urlretrieve
from urllib.parse import urlparse



Expand Down Expand Up @@ -67,9 +68,29 @@ def read_odf(path, languages = "all", usecols = None, skiprows=None, nrows=None,
if (not path.endswith(".zip") and not os.path.exists(path)) or (not path.endswith(".zip") and os.path.exists(path + ".zip")) :
path = path + ".zip"

if not os.path.exists(path):
if not os.path.exists(path) and not is_url(path):
raise FileNotFoundError(f"The file {path} was not found.")

# Download file to tempdir if path is URL
if is_url(path):
# Get the system's temporary directory
temp_dir = gettempdir()

fname = path.split("/")[-1]
# Define the full path where the file will be saved
file_path = os.path.join(temp_dir, fname)

# Download the file using urllib
try:
urlretrieve(path, file_path)
except Exception:
raise Exception("Downloading file from URL failed.")

path = file_path

if not os.path.exists(path):
FileNotFoundError(f"The file {path} was not found.")

if not path.endswith(".zip") and (not os.path.exists(path + "/data.csv") or not os.path.exists(path + "/metadata.xml")):
raise FileNotFoundError(f"A file {path + '.zip'} was not found and in the folder {path} expected metadata.xml and data.csv.")

Expand Down Expand Up @@ -310,3 +331,8 @@ def make_variables_dic(root, variables):
dictionaries[varname] = dictionary

return dictionaries

def is_url(path):
parsed = urlparse(path)
# A URL typically has a scheme (e.g., "http", "https") and a network location (netloc)
return bool(parsed.scheme) and bool(parsed.netloc)

0 comments on commit f1b8665

Please sign in to comment.