Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat: Add API to download likelihood patchset archives #1046

Merged
merged 49 commits into from
Oct 10, 2020
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
49 commits
Select commit Hold shift + click to select a range
a2123dd
Add download command
matthewfeickert Aug 22, 2020
c193320
Add exception for invalid host
matthewfeickert Aug 22, 2020
f2c8751
Verify archive host is valid
matthewfeickert Aug 22, 2020
92ca11f
Add tests for download
matthewfeickert Aug 22, 2020
29455e9
Add raises to docstring
matthewfeickert Aug 22, 2020
beaa9d5
Add all the flags
matthewfeickert Aug 22, 2020
ff8e8b1
Make pyflakes happy
matthewfeickert Aug 22, 2020
70f6367
Print output to screen for verbose tests
matthewfeickert Aug 22, 2020
f9570a8
Add comment on motivation for wait
matthewfeickert Aug 22, 2020
0588447
Visually seperate logic
matthewfeickert Aug 22, 2020
ec77de8
Use Python 3.6 compliant code
matthewfeickert Aug 22, 2020
ffe4d33
Make download be pure-Python
matthewfeickert Aug 22, 2020
bb99c9b
Use term 'host' for consistency
matthewfeickert Aug 22, 2020
001bacc
Check explicilty for failure state
matthewfeickert Aug 22, 2020
1645b51
Use only Python stdlib for CLI
matthewfeickert Aug 22, 2020
1eb04f9
Attempt to deal with Python 3.6 having different warnings
matthewfeickert Aug 22, 2020
bc03e95
Migrate from patchset to contrib
matthewfeickert Sep 11, 2020
4d62266
Add example to docstring
matthewfeickert Sep 11, 2020
f21a69e
Fix docstrings
matthewfeickert Sep 11, 2020
909fdac
Use 'pyhf contrib download' API to reinforce contrib nature
matthewfeickert Sep 11, 2020
ea23305
Use urllib.parse.urlparse
matthewfeickert Sep 12, 2020
f77156c
Use requests for opening archives
matthewfeickert Sep 12, 2020
c882c24
Add note that contrib extra is required
matthewfeickert Sep 12, 2020
36a8dd0
Add www to match pattern in tests
matthewfeickert Sep 12, 2020
2f8d1b3
Make requests a true dependency
matthewfeickert Sep 12, 2020
7f3bb72
Try just the minimal for an optional dependency
matthewfeickert Sep 12, 2020
d9a3dbb
Make note of contrib more clear
matthewfeickert Sep 12, 2020
47b7ad1
Move import inside of download function
matthewfeickert Sep 13, 2020
75e1889
Add test for missing requests module
matthewfeickert Sep 13, 2020
6e8ccd7
Guard contrib functions beyond cli in try except
matthewfeickert Sep 14, 2020
3797d44
Update tests for guarded pyhf contrib download
matthewfeickert Sep 14, 2020
7e23fa9
Add Contrib to Python API docs
matthewfeickert Sep 17, 2020
8bddb71
Correct mislabeling of analysis
matthewfeickert Sep 17, 2020
3fcbcba
Add Python API to contrib download
matthewfeickert Sep 17, 2020
3717003
Wrap Python API in CLI API
matthewfeickert Sep 17, 2020
83bd2ef
Allow for POSIX tar archives that are not gzip
matthewfeickert Sep 17, 2020
d04564c
Sort to avoid doctest error
matthewfeickert Sep 19, 2020
ef71fca
Fixup of test_scripts
matthewfeickert Sep 19, 2020
506e59e
Use del of modules to force state
matthewfeickert Oct 1, 2020
0ca56bc
Fix typo
matthewfeickert Oct 1, 2020
39b8feb
Use mock after watching Anthony Sottile's video
matthewfeickert Oct 5, 2020
8fc005a
Revert "Allow for POSIX tar archives that are not gzip"
matthewfeickert Oct 5, 2020
0af5ba4
Add TOOD for removal of Python 3.6
matthewfeickert Oct 5, 2020
560fe64
Add note
matthewfeickert Oct 5, 2020
2afb313
Use log.error instead of print
matthewfeickert Oct 6, 2020
0e05559
Make pyflakes happy
matthewfeickert Oct 6, 2020
8178fa3
ERROR not INFO
matthewfeickert Oct 6, 2020
d8e2761
Remove unneeded variable
matthewfeickert Oct 6, 2020
b72e399
Don't rely on newlines being in output given systems
matthewfeickert Oct 6, 2020
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
11 changes: 11 additions & 0 deletions docs/api.rst
Original file line number Diff line number Diff line change
Expand Up @@ -173,3 +173,14 @@ Utilities
validate
options_from_eqdelimstring
digest

Contrib
-------

.. currentmodule:: pyhf.contrib

.. autosummary::
:toctree: _generated/

viz.brazil
utils.download
2 changes: 1 addition & 1 deletion setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@
+ extras_require['minuit']
)
)
extras_require['contrib'] = sorted(set(['matplotlib']))
extras_require['contrib'] = sorted(set(['matplotlib', 'requests']))
extras_require['lint'] = sorted(set(['pyflakes', 'black']))

extras_require['test'] = sorted(
Expand Down
3 changes: 2 additions & 1 deletion src/pyhf/cli/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,5 +4,6 @@
from .spec import cli as spec
from .infer import cli as infer
from .complete import cli as complete
from .contrib import cli as contrib

__all__ = ['cli', 'rootio', 'spec', 'infer', 'complete']
__all__ = ['cli', 'rootio', 'spec', 'infer', 'complete', 'contrib']
4 changes: 3 additions & 1 deletion src/pyhf/cli/cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
import click

from ..version import __version__
from . import rootio, spec, infer, patchset, complete
from . import rootio, spec, infer, patchset, complete, contrib

logging.basicConfig()
log = logging.getLogger(__name__)
Expand Down Expand Up @@ -35,3 +35,5 @@ def pyhf():
pyhf.add_command(patchset.cli)

pyhf.add_command(complete.cli)

pyhf.add_command(contrib.cli)
71 changes: 71 additions & 0 deletions src/pyhf/cli/contrib.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,71 @@
"""CLI for functionality that will get migrated out eventually."""
import logging
import click
from pathlib import Path

from ..contrib import utils

logging.basicConfig()
log = logging.getLogger(__name__)


@click.group(name="contrib")
def cli():
"""
Contrib experimental operations.

.. note::

Requires installation of the ``contrib`` extra.

.. code-block:: shell

$ python -m pip install pyhf[contrib]
"""


@cli.command()
@click.argument("archive-url", default="-")
@click.argument("output-directory", default="-")
@click.option("-v", "--verbose", is_flag=True, help="Enables verbose mode")
@click.option(
"-f", "--force", is_flag=True, help="Force download from non-approved host"
)
@click.option(
"-c",
"--compress",
is_flag=True,
help="Keep the archive in a compressed tar.gz form",
)
def download(archive_url, output_directory, verbose, force, compress):
"""
Download the patchset archive from the remote URL and extract it in a
directory at the path given.

Example:

.. code-block:: shell

$ pyhf contrib download --verbose https://www.hepdata.net/record/resource/1408476?view=true 1Lbb-likelihoods
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

should we make the url be the doi.org one?

Copy link
Member Author

@matthewfeickert matthewfeickert Oct 1, 2020

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@lukasheinrich I would love that, but we don't have a DOI for that likelihood tarball. Do you know of one? Or are you suggesting that we use a different likelihood for the example (the multi-b that does have the DOI)?


\b
1Lbb-likelihoods/patchset.json
1Lbb-likelihoods/README.md
1Lbb-likelihoods/BkgOnly.json

Raises:
:class:`~pyhf.exceptions.InvalidArchiveHost`: if the provided archive host name is not known to be valid
"""
try:
utils.download(archive_url, output_directory, force, compress)

if verbose:
file_list = [str(file) for file in list(Path(output_directory).glob("*"))]
print("\n".join(file_list))
except AttributeError as excep:
exception_info = (
str(excep)
+ "\nInstallation of the contrib extra is required to use the contrib CLI API"
+ "\nPlease install with: python -m pip install pyhf[contrib]\n"
)
log.error(exception_info)
66 changes: 66 additions & 0 deletions src/pyhf/contrib/utils.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,66 @@
"""Helper utilities for common tasks."""

from urllib.parse import urlparse
import tarfile
from io import BytesIO
import logging
from .. import exceptions

logging.basicConfig()
log = logging.getLogger(__name__)

try:
import requests

def download(archive_url, output_directory, force=False, compress=False):
"""
Download the patchset archive from the remote URL and extract it in a
directory at the path given.

Example:

>>> from pyhf.contrib.utils import download
>>> download("https://www.hepdata.net/record/resource/1408476?view=true", "1Lbb-likelihoods")
>>> import os
>>> sorted(os.listdir("1Lbb-likelihoods"))
['BkgOnly.json', 'README.md', 'patchset.json']
>>> download("https://www.hepdata.net/record/resource/1408476?view=true", "1Lbb-likelihoods.tar.gz", compress=True)
>>> import glob
>>> glob.glob("1Lbb-likelihoods.tar.gz")
['1Lbb-likelihoods.tar.gz']

Args:
archive_url (`str`): The URL of the :class:`~pyhf.patchset.PatchSet` archive to download.
output_directory (`str`): Name of the directory to unpack the archive into.
force (`Bool`): Force download from non-approved host. Default is ``False``.
compress (`Bool`): Keep the archive in a compressed ``tar.gz`` form. Default is ``False``.

Raises:
:class:`~pyhf.exceptions.InvalidArchiveHost`: if the provided archive host name is not known to be valid
"""
if not force:
valid_hosts = ["www.hepdata.net", "doi.org"]
kratsg marked this conversation as resolved.
Show resolved Hide resolved
netloc = urlparse(archive_url).netloc
if netloc not in valid_hosts:
raise exceptions.InvalidArchiveHost(
f"{netloc} is not an approved archive host: {', '.join(str(host) for host in valid_hosts)}\n"
+ "To download an archive from this host use the --force option."
)

with requests.get(archive_url) as response:
if compress:
with open(output_directory, "wb") as archive:
archive.write(response.content)
else:
with tarfile.open(
mode="r|gz", fileobj=BytesIO(response.content)
) as archive:
archive.extractall(output_directory)


except ModuleNotFoundError as excep:
log.error(
str(excep)
+ "\nInstallation of the contrib extra is required to use pyhf.contrib.utils.download"
+ "\nPlease install with: python -m pip install pyhf[contrib]\n"
)
4 changes: 4 additions & 0 deletions src/pyhf/exceptions/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,10 @@ def __init__(self, ValidationError):
super().__init__(message)


class InvalidArchiveHost(Exception):
"""InvalidArchiveHost is raised when a given patchset archive url is not an approved host."""


class InvalidPatchSet(Exception):
"""InvalidPatchSet is raised when a given patchset object does not have the right configuration, even though it validates correctly against the schema."""

Expand Down
100 changes: 100 additions & 0 deletions tests/test_scripts.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,13 @@
import shlex
import pyhf
import time
import sys
import logging
import pytest
from click.testing import CliRunner
from unittest import mock
from importlib import reload
from importlib import import_module


def test_version(script_runner):
Expand Down Expand Up @@ -561,6 +567,100 @@ def test_workspace_digest(tmpdir, script_runner, algorithms, do_json):
}


@pytest.mark.parametrize(
"archive",
[
"https://www.hepdata.net/record/resource/1408476?view=true",
"https://doi.org/10.17182/hepdata.89408.v1/r2",
],
)
def test_patchset_download(datadir, script_runner, archive):
command = f'pyhf contrib download {archive} {datadir.join("likelihoods").strpath}'
ret = script_runner.run(*shlex.split(command))
assert ret.success

# Run with all optional flags
command = f'pyhf contrib download --verbose --force {archive} {datadir.join("likelihoods").strpath}'
ret = script_runner.run(*shlex.split(command))
assert ret.success

command = f'pyhf contrib download --verbose https://www.fail.org/record/resource/1234567 {datadir.join("likelihoods").strpath}'
ret = script_runner.run(*shlex.split(command))
assert not ret.success
assert (
"pyhf.exceptions.InvalidArchiveHost: www.fail.org is not an approved archive host"
in ret.stderr
)
command = f'pyhf contrib download --verbose --force https://www.fail.org/record/resource/1234567 {datadir.join("likelihoods").strpath}'
ret = script_runner.run(*shlex.split(command))
assert not ret.success
# TODO: https://github.com/scikit-hep/pyhf/issues/1075
# Python 3.6 has different return error than 3.7, 3.8
assert (
"ssl.CertificateError: hostname 'www.fail.org' doesn't match"
or "certificate verify failed: Hostname mismatch, certificate is not valid for 'www.fail.org'."
in ret.stderr
)


def test_missing_contrib_extra(caplog):
with mock.patch.dict(sys.modules):
sys.modules["requests"] = None
if "pyhf.contrib.utils" in sys.modules:
reload(sys.modules["pyhf.contrib.utils"])
else:
import_module("pyhf.cli")

with caplog.at_level(logging.ERROR):
for line in [
"import of requests halted; None in sys.modules",
"Installation of the contrib extra is required to use pyhf.contrib.utils.download",
"Please install with: python -m pip install pyhf[contrib]",
]:
assert line in caplog.text
caplog.clear()


def test_missing_contrib_download(caplog):
with mock.patch.dict(sys.modules):
sys.modules["requests"] = None
if "pyhf.cli" in sys.modules:
reload(sys.modules["pyhf.cli"])
else:
import_module("pyhf.cli")

# Force environment for runner
for module in [
"pyhf.cli.contrib",
"pyhf.contrib",
"pyhf.contrib.utils",
]:
if module in sys.modules:
del sys.modules[module]

from pyhf.cli.contrib import download

runner = CliRunner(mix_stderr=False)
result = runner.invoke(
download,
[
"--verbose",
"https://www.hepdata.net/record/resource/1408476?view=true",
"1Lbb-likelihoods",
],
)
assert result.exit_code == 0

with caplog.at_level(logging.ERROR):
kratsg marked this conversation as resolved.
Show resolved Hide resolved
for line in [
"module 'pyhf.contrib.utils' has no attribute 'download'",
"Installation of the contrib extra is required to use the contrib CLI API",
"Please install with: python -m pip install pyhf[contrib]",
]:
assert line in caplog.text
caplog.clear()


@pytest.mark.parametrize('output_file', [False, True])
@pytest.mark.parametrize('with_metadata', [False, True])
def test_patchset_extract(datadir, tmpdir, script_runner, output_file, with_metadata):
Expand Down