Skip to content

Commit

Permalink
Enforce consistent tensor shape for scalars (#413)
Browse files Browse the repository at this point in the history
* Ensure non-empty tensor shape for consistency
   - Do this by asking for forgiveness and attempting to access shape information, and if it fails modifying the tensor object such that it has the proper shape
* Remove generic_len function from backends as a consistent tensor shape makes it unnecessary
* Add error logging and raise KeyError if an unsupported dtype is used for the tensorlibs astensor method
* Add tests for tensor shape for scalars
* Add tests for bad tensor dtypes
  • Loading branch information
matthewfeickert authored Mar 5, 2019
1 parent 4fa45dd commit 1f6e3b0
Show file tree
Hide file tree
Showing 5 changed files with 77 additions and 45 deletions.
12 changes: 11 additions & 1 deletion pyhf/tensor/mxnet_backend.py
Original file line number Diff line number Diff line change
Expand Up @@ -105,9 +105,19 @@ def astensor(self, tensor_in, dtype='float'):
MXNet NDArray: A multi-dimensional, fixed-size homogenous array.
"""
dtypemap = {'float': 'float32', 'int': 'int32', 'bool': 'uint8'}
dtype = dtypemap[dtype]
try:
dtype = dtypemap[dtype]
except KeyError:
log.error('Invalid dtype: dtype must be float, int, or bool.')
raise

try:
tensor = nd.array(tensor_in, dtype=dtype)
# Ensure non-empty tensor shape for consistency
try:
tensor.shape[0]
except IndexError:
tensor = tensor.broadcast_to((1,))
except ValueError:
tensor = nd.array([tensor_in], dtype=dtype)
return tensor
Expand Down
15 changes: 13 additions & 2 deletions pyhf/tensor/numpy_backend.py
Original file line number Diff line number Diff line change
Expand Up @@ -67,8 +67,19 @@ def astensor(self, tensor_in, dtype='float'):
`numpy.ndarray`: A multi-dimensional, fixed-size homogenous array.
"""
dtypemap = {'float': np.float64, 'int': np.int64, 'bool': np.bool_}
dtype = dtypemap[dtype]
return np.asarray(tensor_in, dtype=dtype)
try:
dtype = dtypemap[dtype]
except KeyError:
log.error('Invalid dtype: dtype must be float, int, or bool.')
raise

tensor = np.asarray(tensor_in, dtype=dtype)
# Ensure non-empty tensor shape for consistency
try:
tensor.shape[0]
except IndexError:
tensor = tensor.reshape(1)
return tensor

def sum(self, tensor_in, axis=None):
return np.sum(tensor_in, axis=axis)
Expand Down
36 changes: 18 additions & 18 deletions pyhf/tensor/pytorch_backend.py
Original file line number Diff line number Diff line change
Expand Up @@ -58,8 +58,19 @@ def astensor(self, tensor_in, dtype='float'):
torch.Tensor: A multi-dimensional matrix containing elements of a single data type.
"""
dtypemap = {'float': torch.float, 'int': torch.int, 'bool': torch.uint8}
dtype = dtypemap[dtype]
return torch.as_tensor(tensor_in, dtype=dtype)
try:
dtype = dtypemap[dtype]
except KeyError:
log.error('Invalid dtype: dtype must be float, int, or bool.')
raise

tensor = torch.as_tensor(tensor_in, dtype=dtype)
# Ensure non-empty tensor shape for consistency
try:
tensor.shape[0]
except IndexError:
tensor = tensor.expand(1)
return tensor

def gather(self, tensor, indices):
return torch.take(tensor, indices.type(torch.LongTensor))
Expand Down Expand Up @@ -165,28 +176,17 @@ def simple_broadcast(self, *args):
list of Tensors: The sequence broadcast together.
"""

def generic_len(a):
try:
return len(a)
except TypeError:
if len(a.shape) < 1:
return 0
else:
return a.shape[0]

args = [self.astensor(arg) for arg in args]
max_dim = max(map(generic_len, args))
max_dim = max(map(len, args))
try:
assert len([arg for arg in args if 1 < generic_len(arg) < max_dim]) == 0
assert len([arg for arg in args if 1 < len(arg) < max_dim]) == 0
except AssertionError as error:
log.error(
'ERROR: The arguments must be of compatible size: 1 or %i', max_dim
)
raise error

broadcast = [
arg if generic_len(arg) > 1 else arg.expand(max_dim) for arg in args
]
broadcast = [arg if len(arg) > 1 else arg.expand(max_dim) for arg in args]
return broadcast

def einsum(self, subscripts, *operands):
Expand Down Expand Up @@ -222,7 +222,7 @@ def poisson(self, n, lam):
>>> pyhf.tensorlib.poisson([5.], [6.])
tensor([0.1606])
>>> pyhf.tensorlib.poisson(5., 6.)
tensor(0.1606)
tensor([0.1606])
Args:
n (`tensor` or `float`): The value at which to evaluate the approximation to the Poisson distribution p.m.f.
Expand Down Expand Up @@ -257,7 +257,7 @@ def normal(self, x, mu, sigma):
>>> pyhf.tensorlib.normal([0.5], [0.], [1.])
tensor([0.3521])
>>> pyhf.tensorlib.normal(0.5, 0., 1.)
tensor(0.3521)
tensor([0.3521])
Args:
x (`tensor` or `float`): The value at which to evaluate the Normal distribution p.d.f.
Expand Down
47 changes: 23 additions & 24 deletions pyhf/tensor/tensorflow_backend.py
Original file line number Diff line number Diff line change
Expand Up @@ -105,17 +105,26 @@ def astensor(self, tensor_in, dtype='float'):
`tf.Tensor`: A symbolic handle to one of the outputs of a `tf.Operation`.
"""
dtypemap = {'float': tf.float32, 'int': tf.int32, 'bool': tf.bool}
dtype = dtypemap[dtype]

if isinstance(tensor_in, tf.Tensor):
v = tensor_in
else:
if isinstance(tensor_in, (int, float)):
tensor_in = [tensor_in]
v = tf.convert_to_tensor(tensor_in)
if v.dtype is not dtype:
v = tf.cast(v, dtype)
return v
try:
dtype = dtypemap[dtype]
except KeyError:
log.error('Invalid dtype: dtype must be float, int, or bool.')
raise

tensor = tensor_in
# If already a tensor then done
try:
tensor.op
except AttributeError:
tensor = tf.convert_to_tensor(tensor_in)
# Ensure non-empty tensor shape for consistency
try:
tensor.shape[0]
except IndexError:
tensor = tf.reshape(tensor, [1])
if tensor.dtype is not dtype:
tensor = tf.cast(tensor, dtype)
return tensor

def sum(self, tensor_in, axis=None):
tensor_in = self.astensor(tensor_in)
Expand Down Expand Up @@ -215,20 +224,10 @@ def simple_broadcast(self, *args):
Returns:
list of Tensors: The sequence broadcast together.
"""

def generic_len(a):
try:
return len(a)
except TypeError:
if len(a.shape) < 1:
return 0
else:
return a.shape[0]

args = [self.astensor(arg) for arg in args]
max_dim = max(map(generic_len, args))
max_dim = max(map(lambda arg: arg.shape[0], args))
try:
assert len([arg for arg in args if 1 < generic_len(arg) < max_dim]) == 0
assert len([arg for arg in args if 1 < arg.shape[0] < max_dim]) == 0
except AssertionError as error:
log.error(
'ERROR: The arguments must be of compatible size: 1 or %i', max_dim
Expand All @@ -237,7 +236,7 @@ def generic_len(a):

broadcast = [
arg
if generic_len(arg) > 1
if arg.shape[0] > 1
else tf.tile(tf.slice(arg, [0], [1]), tf.stack([max_dim]))
for arg in args
]
Expand Down
12 changes: 12 additions & 0 deletions tests/test_tensor.py
Original file line number Diff line number Diff line change
@@ -1,9 +1,18 @@
import pytest
import logging
import numpy as np
import pyhf
from pyhf.simplemodels import hepdata_like


def test_astensor_dtype(backend, caplog):
tb = pyhf.tensorlib
with caplog.at_level(logging.INFO, 'pyhf.tensor'):
with pytest.raises(KeyError):
assert tb.astensor([1, 2, 3], dtype='long')
assert 'Invalid dtype' in caplog.text


def test_simple_tensor_ops(backend):
tb = pyhf.tensorlib
assert tb.tolist(tb.sum([[1, 2, 3], [4, 5, 6]], axis=0)) == [5, 7, 9]
Expand Down Expand Up @@ -84,6 +93,9 @@ def test_reshape(backend):
def test_shape(backend):
tb = pyhf.tensorlib
assert tb.shape(tb.ones((1, 2, 3, 4, 5))) == (1, 2, 3, 4, 5)
assert tb.shape(tb.astensor([])) == (0,)
assert tb.shape(tb.astensor([1.0])) == (1,)
assert tb.shape(tb.astensor(1.0)) == tb.shape(tb.astensor([1.0]))


def test_pdf_calculations(backend):
Expand Down

0 comments on commit 1f6e3b0

Please sign in to comment.