Skip to content

Commit

Permalink
[FIX] cache dir as context and more intelligent and robust
Browse files Browse the repository at this point in the history
  • Loading branch information
[email protected] committed Nov 7, 2024
1 parent 8ec74cb commit c9f5181
Show file tree
Hide file tree
Showing 6 changed files with 157 additions and 117 deletions.
96 changes: 62 additions & 34 deletions gimera/cachedir.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
import os
import uuid
import click
import json
import shutil
Expand All @@ -11,12 +12,16 @@
from .tools import remember_cwd
from .tools import reformat_url
from .tools import _raise_error
from .tools import rmtree
from .tools import replace_dir_with

# store big repos in tar file and try to restore from there;
# otherwise lot of downloads have to be done


def _get_cache_dir(main_repo, repo_yml):
from contextlib import contextmanager
@contextmanager
def _get_cache_dir(main_repo, repo_yml, no_action_if_not_exist=False):
url = repo_yml.url
if not url:
click.secho(f"Missing url: {json.dumps(repo_yml, indent=4)}")
Expand All @@ -28,42 +33,64 @@ def _get_cache_dir(main_repo, repo_yml):
urlsafe = url

for c in "?:+[]{}\\/\"'":
urlsafe = urlsafe.replace(c, "_")
urlsafe = urlsafe.replace(c, "-")
urlsafe = urlsafe.split("@")[-1]

path = Path(os.path.expanduser("~/.cache/gimera")) / urlsafe
path.parent.mkdir(exist_ok=True, parents=True)

must_exist = ["HEAD", "refs", "objects", "config", "info"]
if path.exists() and any(not (path / x).exists() for x in must_exist):
shutil.rmtree(path)

if not path.exists():
click.secho(
f"Caching the repository {repo_yml.url} for quicker reuse",
fg="yellow",
)
tarfile = _get_cache_dir_tarfile(path)
with prepare_dir(path) as _path:
with remember_cwd(
"/tmp"
): # called from other situations where path may not exist anymore
if tarfile.exists():
_extract_tar_file(_path, tarfile)
else:
Repo(main_repo.path).X(*(git + ["clone", "--bare", url, _path]))
_make_tar_file(_path, tarfile)

if repo_yml.sha:
repo = Repo(path)
if not repo.contain_commit(repo_yml.sha):
# make a fetch quickly; sha is missing
repo.X(*(git + ["fetch", "--all"]))
golden_path = path
if no_action_if_not_exist and not golden_path.exists():
yield None
return
possible_temp_path = Path(str(path) + "." + str(uuid.uuid4()))
del path
try:
golden_path.parent.mkdir(exist_ok=True, parents=True)

must_exist = ["HEAD", "refs", "objects", "config", "info"]
if golden_path.exists() and any(not (golden_path / x).exists() for x in must_exist):
rmtree(golden_path)

just_cloned = False
if not golden_path.exists():
click.secho(
f"Caching the repository {repo_yml.url} for quicker reuse",
fg="yellow",
)
tarfile = _get_cache_dir_tarfile(golden_path)
with prepare_dir(possible_temp_path) as _path:
with remember_cwd(
"/tmp"
): # called from other situations where path may not exist anymore
if tarfile.exists():
_extract_tar_file(_path, tarfile)
just_cloned = True
else:
Repo(main_repo.path).X(*(git + ["clone", "--bare", url, _path]))
_make_tar_file(_path, tarfile)
just_cloned = True

effective_path = possible_temp_path if just_cloned else golden_path

if repo_yml.sha:
repo = Repo(effective_path)
if not repo.contain_commit(repo_yml.sha):
_raise_error((
f"After fetching the commit {repo_yml.sha} "
f"was not found for {repo_yml.path}"
))
return path
# make a fetch quickly; sha is missing
repo.X(*(git + ["fetch", "--all"]))
if not repo.contain_commit(repo_yml.sha):
_raise_error((
f"After fetching the commit {repo_yml.sha} "
f"was not found for {repo_yml.path}"
))

yield effective_path

if just_cloned:
replace_dir_with(possible_temp_path, golden_path)

finally:
possible_temp_path = Path(possible_temp_path)
if possible_temp_path.exists():
rmtree(possible_temp_path)

def _get_cache_dir_tarfile(_path):
return Path(str(_path) + ".tar.gz")
Expand All @@ -74,4 +101,5 @@ def _make_tar_file(_path, tarfile):
subprocess.check_call(["tar", "cfz", str(tarfile), "-C", str(_path), '.'])

def _extract_tar_file(_path, tarfile):
click.secho(f"Extracting tar file {tarfile} to {_path}", fg='yellow')
subprocess.check_call(["tar", "xfz", str(tarfile)], cwd=_path)
35 changes: 18 additions & 17 deletions gimera/fetch.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,23 +33,24 @@ def _pull_repo(index, main_repo, repo_yml):
return
verbose(f"Fetching {repo_yml.url}")
results["urls"].add(repo_yml.url)
cache_dir = _get_cache_dir(main_repo, repo_yml)
repo = Repo(cache_dir)
do_fetch = True
if minimal_fetch:
with wait_git_lock(cache_dir):
if repo_yml.sha:
if repo.contains(repo_yml.sha):
do_fetch = False
else:
if repo.contains_branch(repo_yml.branch):
do_fetch = False

if do_fetch:
with wait_git_lock(cache_dir):
_fetch_branch(
repo, repo_yml, filter_remote="origin", no_fetch=False
)
with _get_cache_dir(main_repo, repo_yml, no_action_if_not_exist=True) as cache_dir:
if cache_dir is not None:
repo = Repo(cache_dir)
do_fetch = True
if minimal_fetch:
with wait_git_lock(cache_dir):
if repo_yml.sha:
if repo.contains(repo_yml.sha):
do_fetch = False
else:
if repo.contains_branch(repo_yml.branch):
do_fetch = False

if do_fetch:
with wait_git_lock(cache_dir):
_fetch_branch(
repo, repo_yml, filter_remote="origin", no_fetch=False
)

except Exception as ex:
if os.getenv("GIMERA_IGNORE_FETCH_ERRORS") == "1":
Expand Down
92 changes: 46 additions & 46 deletions gimera/integrated.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,58 +30,58 @@ def _update_integrated_module(
Put contents of a git repository inside the main repository.
"""
# use a cache directory for pulling the repository and updating it
cache_dir = _get_cache_dir(main_repo, repo_yml)
if not os.access(cache_dir, os.W_OK):
_raise_error(f"No R/W rights on {cache_dir}")
repo = Repo(cache_dir)
verbose(f"Updating integrated module {repo_yml.path}")
with _get_cache_dir(main_repo, repo_yml) as cache_dir:
if not os.access(cache_dir, os.W_OK):
_raise_error(f"No R/W rights on {cache_dir}")
repo = Repo(cache_dir)
verbose(f"Updating integrated module {repo_yml.path}")

parent_repo = main_repo
dest_path = Path(working_dir) / repo_yml.path
parent_repo = Repo(get_nearest_repo(main_repo.path, dest_path))
parent_repo = main_repo
dest_path = Path(working_dir) / repo_yml.path
parent_repo = Repo(get_nearest_repo(main_repo.path, dest_path))

# BTW: delete-after cannot remove unused directories - cool to know; is
# just standarded out
if dest_path.exists():
rmtree(dest_path)
# BTW: delete-after cannot remove unused directories - cool to know; is
# just standarded out
if dest_path.exists():
rmtree(dest_path)

with wait_git_lock(cache_dir):
commit = repo_yml.sha or repo_yml.branch if not update else repo_yml.branch
with repo.worktree(commit) as worktree:
new_sha = worktree.hex
msgs = [f"Updating submodule {repo_yml.path}"] + _apply_merges(
worktree, repo_yml
)
worktree.move_worktree_content(dest_path)
# TODO perhaps not necessary as of line 63 -- seems to be necessary
# case: submodule is in .gitignore; updates the submodule
# then git add <path> needs to add the deleted files
# Could also be that a subgimera sha was updated
parent_repo.commit_dir_if_dirty(dest_path, "\n".join(msgs), force=True)
del repo
with wait_git_lock(cache_dir):
commit = repo_yml.sha or repo_yml.branch if not update else repo_yml.branch
with repo.worktree(commit) as worktree:
new_sha = worktree.hex
msgs = [f"Updating submodule {repo_yml.path}"] + _apply_merges(
worktree, repo_yml
)
worktree.move_worktree_content(dest_path)
# TODO perhaps not necessary as of line 63 -- seems to be necessary
# case: submodule is in .gitignore; updates the submodule
# then git add <path> needs to add the deleted files
# Could also be that a subgimera sha was updated
parent_repo.commit_dir_if_dirty(dest_path, "\n".join(msgs), force=True)
del repo

# apply patches:
if os.getenv("GIMERA_DO_NOT_APPLY_PATCHES") != "1":
_apply_patches(repo_yml)
msg = f"updated {REPO_TYPE_INT} submodule: {repo_yml.path}"
repo_yml.sha = new_sha
if repo_yml.config.config_file in parent_repo.all_dirty_files_absolute:
# could be, that the parent path of the gimera.yml belongs to gitignore
# so force add
parent_repo.X(*(git + ["add", '-f', repo_yml.config.config_file]))
parent_repo.commit_dir_if_dirty(dest_path, msg)
if any(
str(x).startswith(str(dest_path)) for x in parent_repo.all_dirty_files_absolute
):
parent_repo.X(*(git + ["add", dest_path]))
# apply patches:
if os.getenv("GIMERA_DO_NOT_APPLY_PATCHES") != "1":
_apply_patches(repo_yml)
msg = f"updated {REPO_TYPE_INT} submodule: {repo_yml.path}"
repo_yml.sha = new_sha
if repo_yml.config.config_file in parent_repo.all_dirty_files_absolute:
# could be, that the parent path of the gimera.yml belongs to gitignore
# so force add
parent_repo.X(*(git + ["add", '-f', repo_yml.config.config_file]))
parent_repo.commit_dir_if_dirty(dest_path, msg)
if any(
str(x).startswith(str(dest_path)) for x in parent_repo.all_dirty_files_absolute
):
parent_repo.X(*(git + ["add", dest_path]))

if parent_repo.staged_files:
parent_repo.X(*(git + ["commit", "-m", msg]))
if parent_repo.staged_files:
parent_repo.X(*(git + ["commit", "-m", msg]))

if repo_yml.edit_patchfile:
_apply_patchfile(
repo_yml.edit_patchfile_full_path, repo_yml.fullpath, error_ok=True
)
if repo_yml.edit_patchfile:
_apply_patchfile(
repo_yml.edit_patchfile_full_path, repo_yml.fullpath, error_ok=True
)


def _apply_merges(repo, repo_yml):
Expand Down
14 changes: 7 additions & 7 deletions gimera/patches.py
Original file line number Diff line number Diff line change
Expand Up @@ -404,13 +404,13 @@ def _clone_directory_and_add_patch_file(
del repo
# also make sure that local cache is updated, because
# latest repo version is applied to project
cache_dir = _get_cache_dir(main_repo, repo_yml)
with wait_git_lock(cache_dir):
repo = Repo(cache_dir)
_fetch_branch(repo, repo_yml, filter_remote="origin")
with repo.worktree(branch) as repo:
repo.pull(repo_yml=repo_yml)
return repo.hex
with _get_cache_dir(main_repo, repo_yml) as cache_dir:
with wait_git_lock(cache_dir):
repo = Repo(cache_dir)
_fetch_branch(repo, repo_yml, filter_remote="origin")
with repo.worktree(branch) as repo:
repo.pull(repo_yml=repo_yml)
return repo.hex


def _technically_make_patch(repo, path):
Expand Down
26 changes: 13 additions & 13 deletions gimera/submodule.py
Original file line number Diff line number Diff line change
Expand Up @@ -117,12 +117,12 @@ def _commit_submodule():

@contextmanager
def _temporary_switch_remote_to_cachedir(main_repo, repo_yml, relpath):
cache_dir = _get_cache_dir(main_repo, repo_yml)
main_repo.X(*(git + ["submodule", "set-url", relpath, f"file://{cache_dir}"]))
try:
yield
finally:
main_repo.X(*(git + ["submodule", "set-url", relpath, repo_yml.url]))
with _get_cache_dir(main_repo, repo_yml) as cache_dir:
main_repo.X(*(git + ["submodule", "set-url", relpath, f"file://{cache_dir}"]))
try:
yield
finally:
main_repo.X(*(git + ["submodule", "set-url", relpath, repo_yml.url]))


def _make_sure_subrepo_is_checked_out(working_dir, main_repo, repo_yml, common_vars):
Expand Down Expand Up @@ -234,13 +234,13 @@ def __add_submodule(root_dir, working_dir, repo, config, all_config, common_vars
repo.X(*(git + ["rm", "-rf", relpath]))
rmtree(repo.path / relpath)

cache_dir = _get_cache_dir(repo, config)
repo.submodule_add(config.branch, str(cache_dir), relpath)
repo.X(*(git + ["submodule", "set-url", relpath, config.url]))
repo.X(*(git + ["add", ".gitmodules"]))
click.secho(f"Added submodule {relpath} pointing to {config.url}", fg="yellow")
if repo.staged_files:
repo.X(*(git + ["commit", "-m", f"gimera added submodule: {relpath}"]))
with _get_cache_dir(repo, config) as cache_dir:
repo.submodule_add(config.branch, str(cache_dir), relpath)
repo.X(*(git + ["submodule", "set-url", relpath, config.url]))
repo.X(*(git + ["add", ".gitmodules"]))
click.secho(f"Added submodule {relpath} pointing to {config.url}", fg="yellow")
if repo.staged_files:
repo.X(*(git + ["commit", "-m", f"gimera added submodule: {relpath}"]))

# check for success
state = get_effective_state(
Expand Down
11 changes: 11 additions & 0 deletions gimera/tools.py
Original file line number Diff line number Diff line change
Expand Up @@ -437,3 +437,14 @@ def files_relative_to(files, folder):
res = safe_relative_to(file, folder)
if res:
yield res

def replace_dir_with(source_dir, dest_dir):
tmppath = Path(str(dest_dir) + "." + str(uuid.uuid4()))
if dest_dir.exists():
shutil.move(dest_dir, tmppath)

try:
shutil.move(source_dir, dest_dir)
finally:
if tmppath.exists():
rmtree(tmppath)

0 comments on commit c9f5181

Please sign in to comment.