-
Notifications
You must be signed in to change notification settings - Fork 9
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
* Initial outline for the first fetching module * Breaks initial outline up into several sub-classes to better manage the differences in artifact type * Adds some in-progress documentation * Flushes out fetch exceptions * Sures-up documentation in the artifact base class * Finishes templat for git Artifact Fetcher * Starts work, in-earnest, on the HTTP artifact fetcher * First-pass implementation of the _extract() function * Implements get_archive_sha256() * Adds documentation * Fetcher exceptions are now under a dedicated exception module * Fixes current linting errors * Fixes static analyzer issues * The RecipeReader.get() call in from_recipe() now uses sub_vars to resolve JINJA variables * README changes * Fixes an issue with deriving archive names from URLs * Fixes more extraction naming issues * Introduces hash utility library to standardize hashing usage. Removes existing hash buffering work. * Adds unit tests for new hashing utility module * Remove unnessecary uses of pass. This gives a much more accurate test-coverage statistic for abstract classes * Starts work on artifact fetching unit tests * Adds advanced HTTP mockers * Adds unit test for fetch() that mocks the file system and HTTP requests * Adds missing pyfakefs requirement to the recipe file * Fixing build test by including conda-forge * Adds fetch failure unit tests * Refactors test_fetch() to use test params instead of fixture params for more dynamic expected value checking * Adds test_get_path_to_source_code() to test the happy-path of that function * Adds unit test for get_archive_sha256() * Adds unit test for get_archive_type() * Update conda_recipe_manager/fetcher/base_artifact_fetcher.py Co-authored-by: Bianca Henderson <[email protected]> --------- Co-authored-by: Bianca Henderson <[email protected]>
- Loading branch information
1 parent
1ccfc83
commit aa15b12
Showing
22 changed files
with
901 additions
and
8 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Empty file.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,31 @@ | ||
""" | ||
:Description: TODO | ||
""" | ||
|
||
from __future__ import annotations | ||
|
||
from typing import Final, cast | ||
|
||
from conda_recipe_manager.fetcher.base_artifact_fetcher import BaseArtifactFetcher | ||
from conda_recipe_manager.parser.recipe_reader import RecipeReader | ||
from conda_recipe_manager.types import Primitives | ||
|
||
# Identifying string used to flag temp files and directories created by this module. | ||
_ARTIFACT_FETCHER_FILE_ID: Final[str] = "crm_artifact_fetcher" | ||
|
||
|
||
def from_recipe(recipe: RecipeReader) -> list[BaseArtifactFetcher]: | ||
""" | ||
TODO Complete: construct from a recipe file directly | ||
""" | ||
sources: list[BaseArtifactFetcher] = [] | ||
# TODO add source-specific parser? | ||
parsed_sources = cast( | ||
dict[str, Primitives] | list[dict[str, Primitives]], recipe.get_value("/source", sub_vars=True) | ||
) | ||
if not isinstance(parsed_sources, list): | ||
parsed_sources = [parsed_sources] | ||
|
||
for _ in parsed_sources: | ||
pass | ||
return sources |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,57 @@ | ||
""" | ||
:Description: Provides a base class that all Artifact Fetcher are derived from. | ||
""" | ||
|
||
from __future__ import annotations | ||
|
||
from abc import ABCMeta, abstractmethod | ||
from pathlib import Path | ||
from tempfile import TemporaryDirectory | ||
from typing import Final | ||
|
||
# Identifying string used to flag temp files and directories created by this module. | ||
_ARTIFACT_FETCHER_FILE_ID: Final[str] = "crm_artifact_fetcher" | ||
|
||
|
||
class BaseArtifactFetcher(metaclass=ABCMeta): | ||
""" | ||
Base class for all `ArtifactFetcher` classes. An `ArtifactFetcher` provides a standard set of tools to retrieve | ||
bundles of source code. | ||
Files retrieved from any artifact fetcher are stored in a secure temporary directory. That directory is deleted | ||
when the Artifact Fetcher instance falls out of scope. | ||
""" | ||
|
||
def __init__(self, name: str) -> None: | ||
""" | ||
Constructs a BaseArtifactFetcher. | ||
:param name: Identifies the artifact. Ideally, this is the package name. In multi-sourced/mirrored scenarios, | ||
this might be the package name combined with some identifying information. | ||
""" | ||
self._name = name | ||
# NOTE: There is an open issue about this pylint edge case: https://github.com/pylint-dev/pylint/issues/7658 | ||
self._temp_dir: Final[TemporaryDirectory[str]] = TemporaryDirectory( # pylint: disable=consider-using-with | ||
prefix=f"{_ARTIFACT_FETCHER_FILE_ID}_", suffix=f"_{self._name}" | ||
) | ||
self._temp_dir_path: Final[Path] = Path(self._temp_dir.name) | ||
# Flag to track if `fetch()` has been called successfully once. | ||
self._successfully_fetched = False | ||
|
||
@abstractmethod | ||
def fetch(self) -> None: | ||
""" | ||
Retrieves the build artifact and source code and dumps it to a secure temporary location. | ||
"Gretchen, stop trying to make fetch happen! It's not going to happen!" - Regina George | ||
:raises FetchError: When the target artifact fails to be acquired. | ||
""" | ||
|
||
@abstractmethod | ||
def get_path_to_source_code(self) -> Path: | ||
""" | ||
Returns the directory containing the artifact's bundled source code. | ||
:raises FetchRequiredError: If a call to `fetch()` is required before using this function. | ||
""" |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,43 @@ | ||
""" | ||
:Description: Provides exceptions for fetching modules. | ||
""" | ||
|
||
from __future__ import annotations | ||
|
||
|
||
class FetcherException(Exception): | ||
""" | ||
Base exception for all other artifact fetching exceptions. Should not be raised directly. | ||
""" | ||
|
||
|
||
class FetchError(FetcherException): | ||
""" | ||
General exception to be thrown when there is a failure to fetch an artifact. | ||
""" | ||
|
||
def __init__(self, message: str): | ||
""" | ||
Constructs a FetchError Exception. | ||
:param message: String description of the issue encountered. | ||
""" | ||
self.message = message if len(message) else "An unknown error occurred while trying to fetch an artifact." | ||
super().__init__(self.message) | ||
|
||
|
||
class FetchRequiredError(FetcherException): | ||
""" | ||
This operation could not be performed because a call to `fetch()` has not yet succeeded. | ||
""" | ||
|
||
def __init__(self, message: str): | ||
""" | ||
Constructs a FetchRequiredError Exception. | ||
:param message: String description of the issue encountered. | ||
""" | ||
self.message = ( | ||
message if len(message) else "An operation could not be completed as the artifact has not been fetched." | ||
) | ||
super().__init__(self.message) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,43 @@ | ||
""" | ||
:Description: Provides an Artifact Fetcher capable of acquiring source code from a remote git repository. | ||
""" | ||
|
||
from __future__ import annotations | ||
|
||
from pathlib import Path | ||
|
||
from conda_recipe_manager.fetcher.base_artifact_fetcher import BaseArtifactFetcher | ||
|
||
|
||
class GitArtifactFetcher(BaseArtifactFetcher): | ||
""" | ||
Artifact Fetcher capable of cloning a remote git repository. | ||
""" | ||
|
||
def __init__(self, name: str, git_url: str): | ||
""" | ||
TODO | ||
TODO add other params | ||
""" | ||
super().__init__(name) | ||
self._git_url = git_url | ||
|
||
def _clone(self) -> None: | ||
""" | ||
TODO | ||
""" | ||
pass | ||
|
||
def fetch(self) -> None: | ||
""" | ||
TODO | ||
""" | ||
self._clone() | ||
|
||
def get_path_to_source_code(self) -> Path: | ||
""" | ||
Returns the directory containing the artifact's bundled source code. | ||
:raises FetchRequiredError: If a call to `fetch()` is required before using this function. | ||
""" | ||
return Path() |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,156 @@ | ||
""" | ||
:Description: Provides an Artifact Fetcher capable of acquiring a software archive from an HTTP/HTTPS source. | ||
""" | ||
|
||
from __future__ import annotations | ||
|
||
import tarfile | ||
import zipfile | ||
from enum import Enum, auto | ||
from pathlib import Path | ||
from typing import Final, Iterator, cast | ||
from urllib.parse import urlparse | ||
|
||
import requests | ||
|
||
from conda_recipe_manager.fetcher.base_artifact_fetcher import BaseArtifactFetcher | ||
from conda_recipe_manager.fetcher.exceptions import FetchError, FetchRequiredError | ||
from conda_recipe_manager.utils.cryptography.hashing import hash_file | ||
|
||
# Default download timeout for artifacts | ||
_DOWNLOAD_TIMEOUT: Final[int] = 5 * 60 # 5 minutes | ||
|
||
|
||
class ArtifactArchiveType(Enum): | ||
""" | ||
Enumerates the types of archive file formats that are supported. | ||
""" | ||
|
||
ZIP = auto() | ||
# TODO determine how to do this in Python | ||
ZIP_7 = auto() # 7zip | ||
TARBALL = auto() | ||
UNKNOWN = auto() # Could not determine the artifact type | ||
|
||
|
||
class HttpArtifactFetcher(BaseArtifactFetcher): | ||
""" | ||
Artifact Fetcher capable of downloading a software archive from a remote HTTP/HTTPS source. | ||
""" | ||
|
||
def __init__(self, name: str, archive_url: str): | ||
""" | ||
Constructs an `HttpArtifactFetcher` instance. | ||
:param name: Identifies the artifact. Ideally, this is the package name. In multi-sourced/mirrored scenarios, | ||
this might be the package name combined with some identifying information. | ||
:param archive_url: URL that points to the target software archive. | ||
""" | ||
super().__init__(name) | ||
self._archive_url = archive_url | ||
self._archive_type = ArtifactArchiveType.UNKNOWN | ||
|
||
# We use `urlparse` to extract the file path containing the archive. This can be used to get the archive's file | ||
# name. Many of the archive files we deal with contain the version number with period markings. We also work | ||
# with archives with many different file extensions. To avoid the many pitfalls here of trying to calculate the | ||
# "true basename" of the file, we just pre-pend `extracted_` to indicate this is the folder containing the | ||
# extracted archive. | ||
archive_file_name: Final[str] = Path(urlparse(self._archive_url).path).name | ||
extracted_dir_name: Final[str] = f"extracted_{archive_file_name}" | ||
|
||
self._archive_path: Final[Path] = self._temp_dir_path / archive_file_name | ||
self._uncompressed_archive_path: Final[Path] = self._temp_dir_path / extracted_dir_name | ||
|
||
def _fetch_guard(self, msg: str) -> None: | ||
""" | ||
Convenience function that prevents executing functions that require the archive to be downloaded. | ||
:param msg: Message to attach to the exception. | ||
:raises FetchRequiredError: If `fetch()` has not been successfully invoked. | ||
""" | ||
if self._successfully_fetched: | ||
return | ||
raise FetchRequiredError(msg) | ||
|
||
def _extract(self) -> None: | ||
""" | ||
Retrieves the build artifact and source code and dumps it to a secure temporary location. | ||
:raises FetchError: If an issue occurred while extracting the archive. | ||
""" | ||
try: | ||
match self._archive_path: | ||
case path if tarfile.is_tarfile(path): | ||
self._archive_type = ArtifactArchiveType.TARBALL | ||
with tarfile.open(self._archive_path, mode="r") as tar_file: | ||
# The `filter="data"` parameter guards against "the most dangerous security issues" | ||
tar_file.extractall(path=self._uncompressed_archive_path, filter="data") | ||
case path if zipfile.is_zipfile(path): | ||
self._archive_type = ArtifactArchiveType.ZIP | ||
with zipfile.ZipFile(self._archive_path) as zip_file: | ||
# TODO improve security checks | ||
zip_file.extractall(path=self._uncompressed_archive_path) | ||
# TODO 7-zip support | ||
case _: | ||
raise FetchError("The archive type could not be identified.") | ||
except (tarfile.TarError, zipfile.BadZipFile, ValueError) as e: | ||
raise FetchError("An extraction error occurred while extracting the archive.") from e | ||
except IOError as e: | ||
raise FetchError("A file system error occurred while extracting the archive.") from e | ||
|
||
def fetch(self) -> None: | ||
""" | ||
Retrieves a software archive from a remote HTTP/HTTPS host and stores the files in a secure temporary directory. | ||
:raises FetchError: If an issue occurred while downloading or extracting the archive. | ||
""" | ||
# Buffered download approach | ||
try: | ||
response = requests.get(str(self._archive_url), stream=True, timeout=_DOWNLOAD_TIMEOUT) | ||
with open(self._archive_path, "wb") as archive: | ||
for chunk in cast(Iterator[bytes], response.iter_content(chunk_size=1024)): | ||
if not chunk: | ||
break | ||
archive.write(chunk) | ||
except requests.exceptions.RequestException as e: # type: ignore[misc] | ||
raise FetchError("An HTTP error occurred while fetching the archive.") from e | ||
except IOError as e: | ||
raise FetchError("A file system error occurred while fetching the archive.") from e | ||
|
||
self._extract() | ||
|
||
# If we have not thrown at this point, we have successfully fetched the archive. | ||
self._successfully_fetched = True | ||
|
||
def get_path_to_source_code(self) -> Path: | ||
""" | ||
Returns the directory containing the artifact's bundled source code. | ||
NOTE: If the target archive compresses top-level folder that contains the source code, this path will point to a | ||
directory containing that uncompressed top-level folder. | ||
:raises FetchRequiredError: If `fetch()` has not been successfully invoked. | ||
""" | ||
self._fetch_guard("Archive has not been downloaded, so the source code is unavailable.") | ||
|
||
return self._uncompressed_archive_path | ||
|
||
def get_archive_sha256(self) -> str: | ||
""" | ||
Calculates a SHA-256 hash on the downloaded software archive. | ||
:raises FetchRequiredError: If `fetch()` has not been successfully invoked. | ||
""" | ||
self._fetch_guard("Archive has not been downloaded, so the file can't be hashed.") | ||
|
||
return hash_file(self._archive_path, "sha256") | ||
|
||
def get_archive_type(self) -> ArtifactArchiveType: | ||
""" | ||
Returns the type of archive that was retrieved. This evaluation was determined by evaluating the file and not by | ||
the file name. | ||
:raises FetchRequiredError: If `fetch()` has not been successfully invoked. | ||
""" | ||
self._fetch_guard("Archive has not been downloaded, so the type can't be determined.") | ||
|
||
return self._archive_type |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Empty file.
Empty file.
Oops, something went wrong.