Skip to content

Commit

Permalink
Add a /gitdiff API (#482)
Browse files Browse the repository at this point in the history
* Add a server API to perform git diff

This change adds a server endpoint to diff two git refs for a single file. This is flexible to perform diffs across various points in the Git lifecycle such as diff'ing changes in the Working Tree, Index, or two arbitrary commits. There are two reserverd refs WORKING and INDEX to designate the Working Tree and the git Index

* Handle empty blobs correctly

If a file was added or deleted, GitPython will indicate the blob as None. In the current case, we would always read the file from disk so the file contents would be same as the Working Tree. This commit modifies that handling to handle the Working Tree case separately, and the None blob case separately.
  • Loading branch information
jaipreet-s authored and vidartf committed Aug 2, 2019
1 parent ed26ad9 commit f96ac65
Show file tree
Hide file tree
Showing 3 changed files with 200 additions and 37 deletions.
62 changes: 42 additions & 20 deletions nbdime/gitfiles.py
Original file line number Diff line number Diff line change
@@ -1,22 +1,29 @@
#!/usr/bin/env python
# -*- coding:utf-8 -*-

import os
import io
import os
from collections import deque

from six import string_types

os.environ['GIT_PYTHON_REFRESH'] = 'quiet'
from git import (
Repo, InvalidGitRepositoryError, BadName, NoSuchPathError,
GitCommandNotFound,
GitCommandNotFound, Diffable
)

from nbdime.vcs.git.filter_integration import apply_possible_filter
from .utils import EXPLICIT_MISSING_FILE, pushd


# Git ref representing the working tree
GitRefWorkingTree = None

# Git ref representing the index
GitRefIndex = Diffable.Index


class BlobWrapper(io.StringIO):
"""StringIO with a name attribute"""
name = ''
Expand Down Expand Up @@ -97,42 +104,51 @@ def _get_diff_entry_stream(path, blob, ref_name, repo_dir):
"""Get a stream to the notebook, for a given diff entry's path and blob
Returns None if path is not a Notebook file, and EXPLICIT_MISSING_FILE
if path is missing."""
if path is missing, or the blob is None (unless diffing against working
tree).
"""
if path:
if not path.endswith('.ipynb'):
return None
if blob is None:
if ref_name is GitRefWorkingTree:
# Diffing against working copy, use file on disk!
with pushd(repo_dir):
# Ensure we filter if appropriate:
if ref_name is None:
# We are diffing against working dir, so remote is candidate
ret = apply_possible_filter(path)
# ret == path means no filter was applied
if ret != path:
return ret
# We are diffing against working dir, so ensure we apply
# any git filters before comparing:
ret = apply_possible_filter(path)
# ret == path means no filter was applied
if ret != path:
return ret
try:
return io.open(path, encoding='utf-8')
except IOError:
return EXPLICIT_MISSING_FILE
elif blob is None:
# GitPython uses a None blob to indicate if a file was deleted or
# added. Workaround for GitPython issue #749.
return EXPLICIT_MISSING_FILE
else:
# There were strange issues with passing blob data_streams around,
# so we solve this by reading into a StringIO buffer.
# The penalty should be low as long as changed_notebooks are used
# properly as an iterator.
f = BlobWrapper(blob.data_stream.read().decode('utf-8'))
f.name = '%s (%s)' % (path, ref_name)
f.name = '%s (%s)' % (
path,
ref_name if ref_name != GitRefIndex else '<INDEX>'
)
return f
return EXPLICIT_MISSING_FILE


def changed_notebooks(ref_base, ref_remote, paths=None, repo_dir=None):
"""Iterator over all notebooks in path that has changed between the two git refs
References are all valid values according to git-rev-parse. If ref_remote
is None, the difference is taken between ref_base and the working directory.
References are all valid values according to git-rev-parse, or one of
the special sentinel values GitRefWorkingTree or GitRefIndex.
Iterator value is a base/remote pair of streams to Notebooks
(or possibly EXPLICIT_MISSING_FILE for added/removed files).
or EXPLICIT_MISSING_FILE for added/removed files.
"""
repo, popped = get_repo(repo_dir or os.curdir)
if repo_dir is None:
Expand All @@ -142,15 +158,21 @@ def changed_notebooks(ref_base, ref_remote, paths=None, repo_dir=None):
if paths and popped:
# All paths need to be prepended by popped
paths = [os.path.join(*(popped + (p,))) for p in paths]
# Get tree for base:
tree_base = repo.commit(ref_base).tree
if ref_remote is None:
# Diff tree against working copy:
diff = tree_base.diff(None, paths)

# Get tree/index for base
if ref_base == GitRefIndex:
tree_base = repo.index
else:
tree_base = repo.commit(ref_base).tree

if ref_remote in (GitRefWorkingTree, GitRefIndex):
diff = tree_base.diff(ref_remote, paths)
else:
# Get remote tree and diff against base:
tree_remote = repo.commit(ref_remote).tree
diff = tree_base.diff(tree_remote, paths)

# Return the base/remote pair of Notebook file streams
for entry in diff:
fa = _get_diff_entry_stream(
entry.a_path, entry.a_blob, ref_base, repo_dir)
Expand Down
39 changes: 39 additions & 0 deletions nbdime/tests/test_server_extension.py
Original file line number Diff line number Diff line change
Expand Up @@ -96,6 +96,45 @@ def test_diff_api(git_repo2, server_extension_app):
assert data['diff']
assert len(data.keys()) == 2

@pytest.mark.timeout(timeout=WEB_TEST_TIMEOUT*6)
def test_git_diff_api(git_repo2, server_extension_app, filespath):
local_path = os.path.relpath(git_repo2, server_extension_app['path'])
url = 'http://127.0.0.1:%i/nbdime/api/gitdiff' % server_extension_app['port']

# Add a difference betweeen index and working tree:
shutil.copy(
pjoin(filespath, 'foo--1.ipynb'),
pjoin(git_repo2, 'sub', 'subfile.ipynb')
)

def _make_ref(key):
if key.lower() in ('working', 'index'):
return {'special': key}
return {'git': key}

# Test various diffs:
for args in (
('HEAD', 'WORKING', 'diff.ipynb'),
('HEAD', 'INDEX', 'diff.ipynb'),
('INDEX', 'HEAD', 'diff.ipynb'),
('INDEX', 'WORKING', 'sub/subfile.ipynb'),
('index', 'working', 'sub/subfile.ipynb'),
('iNdeX', 'WorKING', 'sub/subfile.ipynb'),
):
print(args)
r = requests.post(
url, headers=auth_header,
data=json.dumps({
'ref_local': _make_ref(args[0]),
'ref_remote': _make_ref(args[1]),
'file_path': pjoin(local_path, args[2])
}))
r.raise_for_status()
data = r.json()
nbformat.validate(data['base'])
assert data['diff']
assert len(data.keys()) == 2


@pytest.mark.timeout(timeout=WEB_TEST_TIMEOUT)
def test_diff_api_checkpoint(tmpdir, filespath, server_extension_app):
Expand Down
136 changes: 119 additions & 17 deletions nbdime/webapp/nb_server_extension.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@
from ..gitfiles import (
changed_notebooks, is_path_in_repo, find_repo_root,
InvalidGitRepositoryError, BadName, GitCommandNotFound,
GitRefWorkingTree, GitRefIndex
)
from ..ignorables import diff_ignorables
from ..utils import read_notebook
Expand All @@ -33,6 +34,12 @@
)


special_refs = {
'working': GitRefWorkingTree,
'index': GitRefIndex,
}


class AuthMainDifftoolHandler(MainDifftoolHandler):
@authenticated
def get(self):
Expand Down Expand Up @@ -69,38 +76,45 @@ def get(self):
))


class ExtensionApiDiffHandler(ApiDiffHandler):
"""Diff API handler that also handles diff to git HEAD"""
class BaseGitDiffHandler(ApiDiffHandler):

def _get_git_notebooks(self, base_arg):
def get_git_notebooks(self, file_path_arg, ref_base='HEAD', ref_remote=None):
"""
Gets the content of the before and after state of the notebook based on the given Git refs.
:param file_path_arg: The path to the file being diffed
:param ref_base: the Git ref for the "local" or the "previous" state
:param ref_remote: the Git ref for the "remote" or the "current" state
:return: (base_nb, remote_nb)
"""
# Sometimes the root dir of the files is not cwd
nb_root = getattr(self.contents_manager, 'root_dir', None)
# Resolve base argument to a file system path
base = os.path.realpath(to_os_path(base_arg, nb_root))
file_path = os.path.realpath(to_os_path(file_path_arg, nb_root))

# Ensure path/root_dir that can be sent to git:
try:
git_root = find_repo_root(base)
git_root = find_repo_root(file_path)
except InvalidGitRepositoryError as e:
self.log.exception(e)
raise HTTPError(422, 'Invalid notebook: %s' % base)
base = os.path.relpath(base, git_root)
raise HTTPError(422, 'Invalid notebook: %s' % file_path)
file_path = os.path.relpath(file_path, git_root)

# Get the base/remote notebooks:
try:
for fbase, fremote in changed_notebooks('HEAD', None, base, git_root):
for fbase, fremote in changed_notebooks(ref_base, ref_remote, file_path, git_root):
base_nb = read_notebook(fbase, on_null='minimal')
remote_nb = read_notebook(fremote, on_null='minimal')
break # there should only ever be one set of files
else:
# The filename was either invalid or the file is unchanged
# Assume unchanged, and let read_notebook handle error
# reporting if invalid
base_nb = self.read_notebook(os.path.join(git_root, base))
base_nb = self.read_notebook(os.path.join(git_root, file_path))
remote_nb = base_nb
except (InvalidGitRepositoryError, BadName) as e:
self.log.exception(e)
raise HTTPError(422, 'Invalid notebook: %s' % base_arg)
raise HTTPError(422, 'Invalid notebook: %s' % file_path_arg)
except GitCommandNotFound as e:
self.log.exception(e)
raise HTTPError(
Expand All @@ -109,6 +123,17 @@ def _get_git_notebooks(self, base_arg):

return base_nb, remote_nb

@property
def curdir(self):
root_dir = getattr(self.contents_manager, 'root_dir', None)
if root_dir is None:
return super(ExtensionApiDiffHandler, self).curdir
return root_dir


class ExtensionApiDiffHandler(BaseGitDiffHandler):
"""Diff API handler that also handles diff to git HEAD"""

@gen.coroutine
def _get_checkpoint_notebooks(self, base):
# Get the model for the current notebook:
Expand Down Expand Up @@ -138,11 +163,12 @@ def _get_checkpoint_notebooks(self, base):
@authenticated
@gen.coroutine
def post(self):
# TODO: Add deprecation warning (for git/checkpoint only?)
# Assuming a request on the form "{'argname':arg}"
body = json.loads(escape.to_unicode(self.request.body))
base = body['base']
if base.startswith('git:'):
base_nb, remote_nb = self._get_git_notebooks(base[len('git:'):])
base_nb, remote_nb = self.get_git_notebooks(base[len('git:'):])
elif base.startswith('checkpoint:'):
base_nb, remote_nb = yield self._get_checkpoint_notebooks(base[len('checkpoint:'):])
else:
Expand All @@ -163,12 +189,87 @@ def post(self):
}
self.finish(data)

@property
def curdir(self):
root_dir = getattr(self.contents_manager, 'root_dir', None)
if root_dir is None:
return super(ExtensionApiDiffHandler, self).curdir
return root_dir

class GitDiffHandler(BaseGitDiffHandler):
"""Diff API handler that handles diffs for two git refs"""

@classmethod
def parse_ref(cls, data):
return data.get('git', None) or special_refs[data['special'].lower()]

def _validate_request(self, body):
def _fail(msg):
self.log.exception(msg)
raise HTTPError(400, msg)

# Validate refs
for refname in ('ref_local', 'ref_remote'):

# Validate ref_curr
try:
ref = body[refname]
except KeyError:
_fail('Required key %s not provided in the request' % (refname))

# Either of special or git is supported in ref
if 'special' in ref and 'git' in ref:
_fail('Only one of special and git should be present in git '
'reference.')

if not ('special' in ref or 'git' in ref):
_fail('At least one of special and git should be present in git '
'reference.')

if 'special' in ref:
special = ref['special'].lower()
if refname == 'ref_local':
if special != 'index':
_fail('Only "index" is allowed for the "special" value '
'on ref_local, got %r.' % (special,))
elif special not in ('index', 'working'):
_fail('Only "index" or "working" is allowed for the "special" value '
'on ref_remote, got %r.' % (special,))


# Validate file_name
try:
body['file_path']
except KeyError:
_fail('Required value file_path not provided in the request')


@authenticated
@gen.coroutine
def post(self):
body = json.loads(escape.to_unicode(self.request.body))

try:
# Validate the request input
self._validate_request(body)

# Get file contents based on Git regs
ref_local = body['ref_local']
ref_remote = body['ref_remote']
file_path = body['file_path']
base_nb, remote_nb = self.get_git_notebooks(
file_path,
GitDiffHandler.parse_ref(ref_local),
GitDiffHandler.parse_ref(ref_remote),
)

# Perform actual diff and return data
thediff = diff_notebooks(base_nb, remote_nb)

data = {
'base': base_nb,
'diff': thediff,
}
self.finish(data)
except HTTPError:
raise
except Exception:
self.log.exception('Error diffing documents:')
raise HTTPError(500, 'Error while attempting to diff documents')


class IsGitHandler(NbdimeHandler, APIHandler):
Expand Down Expand Up @@ -226,6 +327,7 @@ def _load_jupyter_server_extension(nb_server_app):
(r'/nbdime/git-difftool', GitDifftoolHandler, params),
(r'/nbdime/api/diff', ExtensionApiDiffHandler, params),
(r'/nbdime/api/isgit', IsGitHandler, params),
(r'/nbdime/api/gitdiff', GitDiffHandler, params)
]

# Prefix routes with base_url:
Expand Down

0 comments on commit f96ac65

Please sign in to comment.