Enforce consistent tensor shape for scalars (#413)

* Ensure non-empty tensor shape for consistency - Do this by asking for forgiveness and attempting to access shape information, and if it fails modifying the tensor object such that it has the proper shape * Remove generic_len function from backends as a consistent tensor shape makes it unnecessary * Add error logging and raise KeyError if an unsupported dtype is used for the tensorlibs astensor method * Add tests for tensor shape for scalars * Add tests for bad tensor dtypes
scikit-hep · Mar 5, 2019 · 1f6e3b0 · 1f6e3b0
1 parent 4fa45dd
commit 1f6e3b0
Show file tree

Hide file tree

Showing 5 changed files with 77 additions and 45 deletions.
diff --git a/pyhf/tensor/mxnet_backend.py b/pyhf/tensor/mxnet_backend.py
@@ -105,9 +105,19 @@ def astensor(self, tensor_in, dtype='float'):
             MXNet NDArray: A multi-dimensional, fixed-size homogenous array.
         """
         dtypemap = {'float': 'float32', 'int': 'int32', 'bool': 'uint8'}
-        dtype = dtypemap[dtype]
+        try:
+            dtype = dtypemap[dtype]
+        except KeyError:
+            log.error('Invalid dtype: dtype must be float, int, or bool.')
+            raise
+
         try:
             tensor = nd.array(tensor_in, dtype=dtype)
+            # Ensure non-empty tensor shape for consistency
+            try:
+                tensor.shape[0]
+            except IndexError:
+                tensor = tensor.broadcast_to((1,))
         except ValueError:
             tensor = nd.array([tensor_in], dtype=dtype)
         return tensor

diff --git a/pyhf/tensor/numpy_backend.py b/pyhf/tensor/numpy_backend.py
@@ -67,8 +67,19 @@ def astensor(self, tensor_in, dtype='float'):
             `numpy.ndarray`: A multi-dimensional, fixed-size homogenous array.
         """
         dtypemap = {'float': np.float64, 'int': np.int64, 'bool': np.bool_}
-        dtype = dtypemap[dtype]
-        return np.asarray(tensor_in, dtype=dtype)
+        try:
+            dtype = dtypemap[dtype]
+        except KeyError:
+            log.error('Invalid dtype: dtype must be float, int, or bool.')
+            raise
+
+        tensor = np.asarray(tensor_in, dtype=dtype)
+        # Ensure non-empty tensor shape for consistency
+        try:
+            tensor.shape[0]
+        except IndexError:
+            tensor = tensor.reshape(1)
+        return tensor
 
     def sum(self, tensor_in, axis=None):
         return np.sum(tensor_in, axis=axis)

diff --git a/pyhf/tensor/pytorch_backend.py b/pyhf/tensor/pytorch_backend.py
@@ -58,8 +58,19 @@ def astensor(self, tensor_in, dtype='float'):
             torch.Tensor: A multi-dimensional matrix containing elements of a single data type.
         """
         dtypemap = {'float': torch.float, 'int': torch.int, 'bool': torch.uint8}
-        dtype = dtypemap[dtype]
-        return torch.as_tensor(tensor_in, dtype=dtype)
+        try:
+            dtype = dtypemap[dtype]
+        except KeyError:
+            log.error('Invalid dtype: dtype must be float, int, or bool.')
+            raise
+
+        tensor = torch.as_tensor(tensor_in, dtype=dtype)
+        # Ensure non-empty tensor shape for consistency
+        try:
+            tensor.shape[0]
+        except IndexError:
+            tensor = tensor.expand(1)
+        return tensor
 
     def gather(self, tensor, indices):
         return torch.take(tensor, indices.type(torch.LongTensor))
@@ -165,28 +176,17 @@ def simple_broadcast(self, *args):
             list of Tensors: The sequence broadcast together.
         """
 
-        def generic_len(a):
-            try:
-                return len(a)
-            except TypeError:
-                if len(a.shape) < 1:
-                    return 0
-                else:
-                    return a.shape[0]
-
         args = [self.astensor(arg) for arg in args]
-        max_dim = max(map(generic_len, args))
+        max_dim = max(map(len, args))
         try:
-            assert len([arg for arg in args if 1 < generic_len(arg) < max_dim]) == 0
+            assert len([arg for arg in args if 1 < len(arg) < max_dim]) == 0
         except AssertionError as error:
             log.error(
                 'ERROR: The arguments must be of compatible size: 1 or %i', max_dim
             )
             raise error
 
-        broadcast = [
-            arg if generic_len(arg) > 1 else arg.expand(max_dim) for arg in args
-        ]
+        broadcast = [arg if len(arg) > 1 else arg.expand(max_dim) for arg in args]
         return broadcast
 
     def einsum(self, subscripts, *operands):
@@ -222,7 +222,7 @@ def poisson(self, n, lam):
             >>> pyhf.tensorlib.poisson([5.], [6.])
             tensor([0.1606])
             >>> pyhf.tensorlib.poisson(5., 6.)
-            tensor(0.1606)
+            tensor([0.1606])
 
         Args:
             n (`tensor` or `float`): The value at which to evaluate the approximation to the Poisson distribution p.m.f.
@@ -257,7 +257,7 @@ def normal(self, x, mu, sigma):
             >>> pyhf.tensorlib.normal([0.5], [0.], [1.])
             tensor([0.3521])
             >>> pyhf.tensorlib.normal(0.5, 0., 1.)
-            tensor(0.3521)
+            tensor([0.3521])
 
         Args:
             x (`tensor` or `float`): The value at which to evaluate the Normal distribution p.d.f.

diff --git a/pyhf/tensor/tensorflow_backend.py b/pyhf/tensor/tensorflow_backend.py
@@ -105,17 +105,26 @@ def astensor(self, tensor_in, dtype='float'):
             `tf.Tensor`: A symbolic handle to one of the outputs of a `tf.Operation`.
         """
         dtypemap = {'float': tf.float32, 'int': tf.int32, 'bool': tf.bool}
-        dtype = dtypemap[dtype]
-
-        if isinstance(tensor_in, tf.Tensor):
-            v = tensor_in
-        else:
-            if isinstance(tensor_in, (int, float)):
-                tensor_in = [tensor_in]
-            v = tf.convert_to_tensor(tensor_in)
-        if v.dtype is not dtype:
-            v = tf.cast(v, dtype)
-        return v
+        try:
+            dtype = dtypemap[dtype]
+        except KeyError:
+            log.error('Invalid dtype: dtype must be float, int, or bool.')
+            raise
+
+        tensor = tensor_in
+        # If already a tensor then done
+        try:
+            tensor.op
+        except AttributeError:
+            tensor = tf.convert_to_tensor(tensor_in)
+            # Ensure non-empty tensor shape for consistency
+            try:
+                tensor.shape[0]
+            except IndexError:
+                tensor = tf.reshape(tensor, [1])
+        if tensor.dtype is not dtype:
+            tensor = tf.cast(tensor, dtype)
+        return tensor
 
     def sum(self, tensor_in, axis=None):
         tensor_in = self.astensor(tensor_in)
@@ -215,20 +224,10 @@ def simple_broadcast(self, *args):
         Returns:
             list of Tensors: The sequence broadcast together.
         """
-
-        def generic_len(a):
-            try:
-                return len(a)
-            except TypeError:
-                if len(a.shape) < 1:
-                    return 0
-                else:
-                    return a.shape[0]
-
         args = [self.astensor(arg) for arg in args]
-        max_dim = max(map(generic_len, args))
+        max_dim = max(map(lambda arg: arg.shape[0], args))
         try:
-            assert len([arg for arg in args if 1 < generic_len(arg) < max_dim]) == 0
+            assert len([arg for arg in args if 1 < arg.shape[0] < max_dim]) == 0
         except AssertionError as error:
             log.error(
                 'ERROR: The arguments must be of compatible size: 1 or %i', max_dim
@@ -237,7 +236,7 @@ def generic_len(a):
 
         broadcast = [
             arg
-            if generic_len(arg) > 1
+            if arg.shape[0] > 1
             else tf.tile(tf.slice(arg, [0], [1]), tf.stack([max_dim]))
             for arg in args
         ]

diff --git a/tests/test_tensor.py b/tests/test_tensor.py
@@ -1,9 +1,18 @@
 import pytest
+import logging
 import numpy as np
 import pyhf
 from pyhf.simplemodels import hepdata_like
 
 
+def test_astensor_dtype(backend, caplog):
+    tb = pyhf.tensorlib
+    with caplog.at_level(logging.INFO, 'pyhf.tensor'):
+        with pytest.raises(KeyError):
+            assert tb.astensor([1, 2, 3], dtype='long')
+            assert 'Invalid dtype' in caplog.text
+
+
 def test_simple_tensor_ops(backend):
     tb = pyhf.tensorlib
     assert tb.tolist(tb.sum([[1, 2, 3], [4, 5, 6]], axis=0)) == [5, 7, 9]
@@ -84,6 +93,9 @@ def test_reshape(backend):
 def test_shape(backend):
     tb = pyhf.tensorlib
     assert tb.shape(tb.ones((1, 2, 3, 4, 5))) == (1, 2, 3, 4, 5)
+    assert tb.shape(tb.astensor([])) == (0,)
+    assert tb.shape(tb.astensor([1.0])) == (1,)
+    assert tb.shape(tb.astensor(1.0)) == tb.shape(tb.astensor([1.0]))
 
 
 def test_pdf_calculations(backend):