Skip to content

Commit

Permalink
#16
Browse files Browse the repository at this point in the history
  • Loading branch information
anzonyquispe committed Mar 22, 2021
1 parent a2adaf0 commit b59c07a
Show file tree
Hide file tree
Showing 6 changed files with 2,591 additions and 1,788 deletions.
1,025 changes: 67 additions & 958 deletions Jupyter_Notebooks/.ipynb_checkpoints/pm3-notebook-newdata-checkpoint.ipynb

Large diffs are not rendered by default.

Original file line number Diff line number Diff line change
@@ -0,0 +1,210 @@
{
"cells": [
{
"cell_type": "markdown",
"metadata": {
"papermill": {
"duration": 0.012019,
"end_time": "2021-02-15T11:01:41.761156",
"exception": false,
"start_time": "2021-02-15T11:01:41.749137",
"status": "completed"
},
"tags": []
},
"source": [
"\n",
"\n",
"This notebook contains an example for teaching.\n"
]
},
{
"cell_type": "markdown",
"metadata": {
"_execution_state": "idle",
"_uuid": "051d70d956493feee0c6d64651c6a088724dca2a",
"papermill": {
"duration": 0.010774,
"end_time": "2021-02-15T11:01:41.782833",
"exception": false,
"start_time": "2021-02-15T11:01:41.772059",
"status": "completed"
},
"tags": []
},
"source": [
"# Penalized Linear Regressions: A Simulation Experiment"
]
},
{
"cell_type": "markdown",
"metadata": {
"papermill": {
"duration": 0.010616,
"end_time": "2021-02-15T11:01:41.804126",
"exception": false,
"start_time": "2021-02-15T11:01:41.793510",
"status": "completed"
},
"tags": []
},
"source": [
"## Data Generating Process: Approximately Sparse"
]
},
{
"cell_type": "code",
"execution_count": 67,
"metadata": {},
"outputs": [],
"source": [
"import random\n",
"random.seed(1)\n",
"import numpy as np\n",
"import math\n",
"import matplotlib.pyplot as plt"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"n = 100\n",
"p = 400\n",
"\n",
"Z = np.random.uniform( low = 0 , high = 1 , size = n) - 1/2 \n",
"\n",
"W = ( np.random.uniform( low = 0 , high = 1 , size = n * p ) - 1/2 ).\\\n",
" reshape( n , p )\n",
"\n",
"beta = ((1/ np.arange(1, p + 1 )) ** 2)\n",
"gX = np.exp( 4 * Z ) + (W @ beta)\n",
"X = np.concatenate( ( Z.reshape(Z.size, 1), Z.reshape(Z.size, 1) \\\n",
" ** 2, Z.reshape(Z.size, 1) ** 3, W ) , axis = 1 )\n",
"\n",
"mean = 0\n",
"sd = 1\n",
"Y = gX + np.random.normal( mean , sd, n )\n",
"\n",
"We use package Glmnet to carry out predictions using cross-validated lasso, ridge, and elastic netfig = plt.figure()\n",
"fig.suptitle('Y vs g(X)')\n",
"ax = fig.add_subplot(111)\n",
"plt.scatter( Y, gX)\n",
"plt.xlabel('g(X)')\n",
"plt.ylabel('Y')\n",
"plt.show()\n",
"\n",
"print( f\"theoretical R2:, {np.var(gX) / np.var( Y )}\" ) \n",
"\n",
"np.var(gX) / np.var( Y ) #theoretical R-square in the simulation example"
]
},
{
"cell_type": "markdown",
"metadata": {
"papermill": {
"duration": 0.013571,
"end_time": "2021-02-15T11:01:42.446308",
"exception": false,
"start_time": "2021-02-15T11:01:42.432737",
"status": "completed"
},
"tags": []
},
"source": [
"We use package Glmnet to carry out predictions using cross-validated lasso, ridge, and elastic net"
]
},
{
"cell_type": "code",
"execution_count": 90,
"metadata": {},
"outputs": [],
"source": [
"import glmnet_python\n",
"from glmnet import glmnet"
]
},
{
"cell_type": "code",
"execution_count": 96,
"metadata": {},
"outputs": [],
"source": [
"import sys\n",
"sys.path.append('../test')\n",
"sys.path.append('../lib')"
]
},
{
"cell_type": "code",
"execution_count": 93,
"metadata": {},
"outputs": [],
"source": [
"import scipy, importlib, pprint, matplotlib.pyplot as plt, warnings\n",
"from glmnet import glmnet; from glmnetPlot import glmnetPlot\n",
"from glmnetPrint import glmnetPrint; from glmnetCoef import glmnetCoef; from glmnetPredict import glmnetPredict\n",
"from cvglmnet import cvglmnet; from cvglmnetCoef import cvglmnetCoef\n",
"from cvglmnetPlot import cvglmnetPlot; from cvglmnetPredict import cvglmnetPredict"
]
},
{
"cell_type": "code",
"execution_count": 97,
"metadata": {},
"outputs": [
{
"ename": "ValueError",
"evalue": "loadGlmlib does not currently work for windows",
"output_type": "error",
"traceback": [
"\u001b[1;31m---------------------------------------------------------------------------\u001b[0m",
"\u001b[1;31mValueError\u001b[0m Traceback (most recent call last)",
"\u001b[1;32m<ipython-input-97-f4c9fa417bb8>\u001b[0m in \u001b[0;36m<module>\u001b[1;34m\u001b[0m\n\u001b[1;32m----> 1\u001b[1;33m fit = glmnet(x = X, y = Y, family = 'gaussian', \\\n\u001b[0m\u001b[0;32m 2\u001b[0m alpha = 0.1 )\n",
"\u001b[1;32mc:\\python\\python38\\lib\\site-packages\\glmnet_python\\glmnet.py\u001b[0m in \u001b[0;36mglmnet\u001b[1;34m(x, y, family, **options)\u001b[0m\n\u001b[0;32m 446\u001b[0m \u001b[1;32mif\u001b[0m \u001b[0mfamily\u001b[0m \u001b[1;33m==\u001b[0m \u001b[1;34m'gaussian'\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 447\u001b[0m \u001b[1;31m# call elnet\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m--> 448\u001b[1;33m fit = elnet(x, is_sparse, irs, pcs, y, weights, offset, gtype, parm, \n\u001b[0m\u001b[0;32m 449\u001b[0m \u001b[0mlempty\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mnvars\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mjd\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mvp\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mcl\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mne\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mnx\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mnlam\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mflmin\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mulam\u001b[0m\u001b[1;33m,\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 450\u001b[0m thresh, isd, intr, maxit, family)\n",
"\u001b[1;32mc:\\python\\python38\\lib\\site-packages\\glmnet_python\\elnet.py\u001b[0m in \u001b[0;36melnet\u001b[1;34m(x, is_sparse, irs, pcs, y, weights, offset, gtype, parm, lempty, nvars, jd, vp, cl, ne, nx, nlam, flmin, ulam, thresh, isd, intr, maxit, family)\u001b[0m\n\u001b[0;32m 15\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 16\u001b[0m \u001b[1;31m# load shared fortran library\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m---> 17\u001b[1;33m \u001b[0mglmlib\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mloadGlmLib\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m 18\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 19\u001b[0m \u001b[1;31m# pre-process data\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n",
"\u001b[1;32mc:\\python\\python38\\lib\\site-packages\\glmnet_python\\loadGlmLib.py\u001b[0m in \u001b[0;36mloadGlmLib\u001b[1;34m()\u001b[0m\n\u001b[0;32m 26\u001b[0m \u001b[1;32melif\u001b[0m \u001b[0mos\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mname\u001b[0m \u001b[1;33m==\u001b[0m \u001b[1;34m'nt'\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 27\u001b[0m \u001b[1;31m# this does not currently work\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m---> 28\u001b[1;33m \u001b[1;32mraise\u001b[0m \u001b[0mValueError\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;34m'loadGlmlib does not currently work for windows'\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m 29\u001b[0m \u001b[1;31m# glmlib = ctypes.windll.LoadLibrary(glmnet_dll)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 30\u001b[0m \u001b[1;32melse\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n",
"\u001b[1;31mValueError\u001b[0m: loadGlmlib does not currently work for windows"
]
}
],
"source": [
"fit = glmnet(x = X, y = Y, family = 'gaussian', \\\n",
" alpha = 0.1 )"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Problems with loadGlmlib does not currently work for windows. According to this [link](https://web.stanford.edu/~hastie/glmnet_python/), this library is currently supported in Linux. \\\n",
"Here is some explanation about how to proceede like cv.glamee"
]
}
],
"metadata": {
"hide_input": false,
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.8.6"
}
},
"nbformat": 4,
"nbformat_minor": 4
}
Loading

0 comments on commit b59c07a

Please sign in to comment.