#16

alexanderquispe · Mar 22, 2021 · b59c07a · b59c07a
1 parent a2adaf0
commit b59c07a
Show file tree

Hide file tree

Showing 6 changed files with 2,591 additions and 1,788 deletions.
diff --git a/Jupyter_Notebooks/.ipynb_checkpoints/pm3-notebook-newdata-checkpoint.ipynb b/Jupyter_Notebooks/.ipynb_checkpoints/pm3-notebook-newdata-checkpoint.ipynb
diff --git a/Jupyter_Notebooks/.ipynb_checkpoints/python-notebook-linear-penalized-regs-checkpoint.ipynb b/Jupyter_Notebooks/.ipynb_checkpoints/python-notebook-linear-penalized-regs-checkpoint.ipynb
@@ -0,0 +1,210 @@
+{
+ "cells": [
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "papermill": {
+     "duration": 0.012019,
+     "end_time": "2021-02-15T11:01:41.761156",
+     "exception": false,
+     "start_time": "2021-02-15T11:01:41.749137",
+     "status": "completed"
+    },
+    "tags": []
+   },
+   "source": [
+    "\n",
+    "\n",
+    "This notebook contains an example for teaching.\n"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "_execution_state": "idle",
+    "_uuid": "051d70d956493feee0c6d64651c6a088724dca2a",
+    "papermill": {
+     "duration": 0.010774,
+     "end_time": "2021-02-15T11:01:41.782833",
+     "exception": false,
+     "start_time": "2021-02-15T11:01:41.772059",
+     "status": "completed"
+    },
+    "tags": []
+   },
+   "source": [
+    "# Penalized Linear Regressions: A Simulation Experiment"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "papermill": {
+     "duration": 0.010616,
+     "end_time": "2021-02-15T11:01:41.804126",
+     "exception": false,
+     "start_time": "2021-02-15T11:01:41.793510",
+     "status": "completed"
+    },
+    "tags": []
+   },
+   "source": [
+    "## Data Generating Process: Approximately Sparse"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 67,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import random\n",
+    "random.seed(1)\n",
+    "import numpy as np\n",
+    "import math\n",
+    "import matplotlib.pyplot as plt"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "n = 100\n",
+    "p = 400\n",
+    "\n",
+    "Z = np.random.uniform( low = 0 , high = 1 , size = n) - 1/2 \n",
+    "\n",
+    "W = ( np.random.uniform( low = 0 , high = 1 , size = n * p ) - 1/2 ).\\\n",
+    "        reshape( n , p )\n",
+    "\n",
+    "beta = ((1/ np.arange(1, p + 1 )) ** 2)\n",
+    "gX = np.exp( 4 * Z ) + (W @ beta)\n",
+    "X = np.concatenate( ( Z.reshape(Z.size, 1), Z.reshape(Z.size, 1) \\\n",
+    "                     ** 2, Z.reshape(Z.size, 1) ** 3, W ) , axis = 1 )\n",
+    "\n",
+    "mean = 0\n",
+    "sd = 1\n",
+    "Y = gX + np.random.normal( mean , sd, n )\n",
+    "\n",
+    "We use package Glmnet to carry out predictions using cross-validated lasso, ridge, and elastic netfig = plt.figure()\n",
+    "fig.suptitle('Y vs g(X)')\n",
+    "ax = fig.add_subplot(111)\n",
+    "plt.scatter( Y, gX)\n",
+    "plt.xlabel('g(X)')\n",
+    "plt.ylabel('Y')\n",
+    "plt.show()\n",
+    "\n",
+    "print( f\"theoretical R2:, {np.var(gX) / np.var( Y )}\" ) \n",
+    "\n",
+    "np.var(gX) / np.var( Y ) #theoretical R-square in the simulation example"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "papermill": {
+     "duration": 0.013571,
+     "end_time": "2021-02-15T11:01:42.446308",
+     "exception": false,
+     "start_time": "2021-02-15T11:01:42.432737",
+     "status": "completed"
+    },
+    "tags": []
+   },
+   "source": [
+    "We use package Glmnet to carry out predictions using cross-validated lasso, ridge, and elastic net"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 90,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import glmnet_python\n",
+    "from glmnet import glmnet"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 96,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import sys\n",
+    "sys.path.append('../test')\n",
+    "sys.path.append('../lib')"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 93,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import scipy, importlib, pprint, matplotlib.pyplot as plt, warnings\n",
+    "from glmnet import glmnet; from glmnetPlot import glmnetPlot\n",
+    "from glmnetPrint import glmnetPrint; from glmnetCoef import glmnetCoef; from glmnetPredict import glmnetPredict\n",
+    "from cvglmnet import cvglmnet; from cvglmnetCoef import cvglmnetCoef\n",
+    "from cvglmnetPlot import cvglmnetPlot; from cvglmnetPredict import cvglmnetPredict"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 97,
+   "metadata": {},
+   "outputs": [
+    {
+     "ename": "ValueError",
+     "evalue": "loadGlmlib does not currently work for windows",
+     "output_type": "error",
+     "traceback": [
+      "\u001b[1;31m---------------------------------------------------------------------------\u001b[0m",
+      "\u001b[1;31mValueError\u001b[0m                                Traceback (most recent call last)",
+      "\u001b[1;32m<ipython-input-97-f4c9fa417bb8>\u001b[0m in \u001b[0;36m<module>\u001b[1;34m\u001b[0m\n\u001b[1;32m----> 1\u001b[1;33m fit = glmnet(x = X, y = Y, family = 'gaussian', \\\n\u001b[0m\u001b[0;32m      2\u001b[0m                     alpha = 0.1 )\n",
+      "\u001b[1;32mc:\\python\\python38\\lib\\site-packages\\glmnet_python\\glmnet.py\u001b[0m in \u001b[0;36mglmnet\u001b[1;34m(x, y, family, **options)\u001b[0m\n\u001b[0;32m    446\u001b[0m     \u001b[1;32mif\u001b[0m \u001b[0mfamily\u001b[0m \u001b[1;33m==\u001b[0m \u001b[1;34m'gaussian'\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m    447\u001b[0m         \u001b[1;31m# call elnet\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m--> 448\u001b[1;33m         fit = elnet(x, is_sparse, irs, pcs, y, weights, offset, gtype, parm, \n\u001b[0m\u001b[0;32m    449\u001b[0m                     \u001b[0mlempty\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mnvars\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mjd\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mvp\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mcl\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mne\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mnx\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mnlam\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mflmin\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mulam\u001b[0m\u001b[1;33m,\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m    450\u001b[0m                     thresh, isd, intr, maxit, family)\n",
+      "\u001b[1;32mc:\\python\\python38\\lib\\site-packages\\glmnet_python\\elnet.py\u001b[0m in \u001b[0;36melnet\u001b[1;34m(x, is_sparse, irs, pcs, y, weights, offset, gtype, parm, lempty, nvars, jd, vp, cl, ne, nx, nlam, flmin, ulam, thresh, isd, intr, maxit, family)\u001b[0m\n\u001b[0;32m     15\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m     16\u001b[0m     \u001b[1;31m# load shared fortran library\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m---> 17\u001b[1;33m     \u001b[0mglmlib\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mloadGlmLib\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m     18\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m     19\u001b[0m     \u001b[1;31m# pre-process data\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n",
+      "\u001b[1;32mc:\\python\\python38\\lib\\site-packages\\glmnet_python\\loadGlmLib.py\u001b[0m in \u001b[0;36mloadGlmLib\u001b[1;34m()\u001b[0m\n\u001b[0;32m     26\u001b[0m     \u001b[1;32melif\u001b[0m \u001b[0mos\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mname\u001b[0m \u001b[1;33m==\u001b[0m \u001b[1;34m'nt'\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m     27\u001b[0m         \u001b[1;31m# this does not currently work\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m---> 28\u001b[1;33m         \u001b[1;32mraise\u001b[0m \u001b[0mValueError\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;34m'loadGlmlib does not currently work for windows'\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m     29\u001b[0m         \u001b[1;31m# glmlib = ctypes.windll.LoadLibrary(glmnet_dll)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m     30\u001b[0m     \u001b[1;32melse\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n",
+      "\u001b[1;31mValueError\u001b[0m: loadGlmlib does not currently work for windows"
+     ]
+    }
+   ],
+   "source": [
+    "fit = glmnet(x = X, y = Y, family = 'gaussian', \\\n",
+    "                    alpha = 0.1 )"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "Problems with loadGlmlib does not currently work for windows. According to this [link](https://web.stanford.edu/~hastie/glmnet_python/), this library is currently supported in Linux. \\\n",
+    "Here is some explanation about how to proceede like cv.glamee"
+   ]
+  }
+ ],
+ "metadata": {
+  "hide_input": false,
+  "kernelspec": {
+   "display_name": "Python 3",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.8.6"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 4
+}