-
Notifications
You must be signed in to change notification settings - Fork 4
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
1 parent
a2adaf0
commit b59c07a
Showing
6 changed files
with
2,591 additions
and
1,788 deletions.
There are no files selected for viewing
1,025 changes: 67 additions & 958 deletions
1,025
Jupyter_Notebooks/.ipynb_checkpoints/pm3-notebook-newdata-checkpoint.ipynb
Large diffs are not rendered by default.
Oops, something went wrong.
210 changes: 210 additions & 0 deletions
210
Jupyter_Notebooks/.ipynb_checkpoints/python-notebook-linear-penalized-regs-checkpoint.ipynb
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,210 @@ | ||
{ | ||
"cells": [ | ||
{ | ||
"cell_type": "markdown", | ||
"metadata": { | ||
"papermill": { | ||
"duration": 0.012019, | ||
"end_time": "2021-02-15T11:01:41.761156", | ||
"exception": false, | ||
"start_time": "2021-02-15T11:01:41.749137", | ||
"status": "completed" | ||
}, | ||
"tags": [] | ||
}, | ||
"source": [ | ||
"\n", | ||
"\n", | ||
"This notebook contains an example for teaching.\n" | ||
] | ||
}, | ||
{ | ||
"cell_type": "markdown", | ||
"metadata": { | ||
"_execution_state": "idle", | ||
"_uuid": "051d70d956493feee0c6d64651c6a088724dca2a", | ||
"papermill": { | ||
"duration": 0.010774, | ||
"end_time": "2021-02-15T11:01:41.782833", | ||
"exception": false, | ||
"start_time": "2021-02-15T11:01:41.772059", | ||
"status": "completed" | ||
}, | ||
"tags": [] | ||
}, | ||
"source": [ | ||
"# Penalized Linear Regressions: A Simulation Experiment" | ||
] | ||
}, | ||
{ | ||
"cell_type": "markdown", | ||
"metadata": { | ||
"papermill": { | ||
"duration": 0.010616, | ||
"end_time": "2021-02-15T11:01:41.804126", | ||
"exception": false, | ||
"start_time": "2021-02-15T11:01:41.793510", | ||
"status": "completed" | ||
}, | ||
"tags": [] | ||
}, | ||
"source": [ | ||
"## Data Generating Process: Approximately Sparse" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": 67, | ||
"metadata": {}, | ||
"outputs": [], | ||
"source": [ | ||
"import random\n", | ||
"random.seed(1)\n", | ||
"import numpy as np\n", | ||
"import math\n", | ||
"import matplotlib.pyplot as plt" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": null, | ||
"metadata": {}, | ||
"outputs": [], | ||
"source": [ | ||
"n = 100\n", | ||
"p = 400\n", | ||
"\n", | ||
"Z = np.random.uniform( low = 0 , high = 1 , size = n) - 1/2 \n", | ||
"\n", | ||
"W = ( np.random.uniform( low = 0 , high = 1 , size = n * p ) - 1/2 ).\\\n", | ||
" reshape( n , p )\n", | ||
"\n", | ||
"beta = ((1/ np.arange(1, p + 1 )) ** 2)\n", | ||
"gX = np.exp( 4 * Z ) + (W @ beta)\n", | ||
"X = np.concatenate( ( Z.reshape(Z.size, 1), Z.reshape(Z.size, 1) \\\n", | ||
" ** 2, Z.reshape(Z.size, 1) ** 3, W ) , axis = 1 )\n", | ||
"\n", | ||
"mean = 0\n", | ||
"sd = 1\n", | ||
"Y = gX + np.random.normal( mean , sd, n )\n", | ||
"\n", | ||
"We use package Glmnet to carry out predictions using cross-validated lasso, ridge, and elastic netfig = plt.figure()\n", | ||
"fig.suptitle('Y vs g(X)')\n", | ||
"ax = fig.add_subplot(111)\n", | ||
"plt.scatter( Y, gX)\n", | ||
"plt.xlabel('g(X)')\n", | ||
"plt.ylabel('Y')\n", | ||
"plt.show()\n", | ||
"\n", | ||
"print( f\"theoretical R2:, {np.var(gX) / np.var( Y )}\" ) \n", | ||
"\n", | ||
"np.var(gX) / np.var( Y ) #theoretical R-square in the simulation example" | ||
] | ||
}, | ||
{ | ||
"cell_type": "markdown", | ||
"metadata": { | ||
"papermill": { | ||
"duration": 0.013571, | ||
"end_time": "2021-02-15T11:01:42.446308", | ||
"exception": false, | ||
"start_time": "2021-02-15T11:01:42.432737", | ||
"status": "completed" | ||
}, | ||
"tags": [] | ||
}, | ||
"source": [ | ||
"We use package Glmnet to carry out predictions using cross-validated lasso, ridge, and elastic net" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": 90, | ||
"metadata": {}, | ||
"outputs": [], | ||
"source": [ | ||
"import glmnet_python\n", | ||
"from glmnet import glmnet" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": 96, | ||
"metadata": {}, | ||
"outputs": [], | ||
"source": [ | ||
"import sys\n", | ||
"sys.path.append('../test')\n", | ||
"sys.path.append('../lib')" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": 93, | ||
"metadata": {}, | ||
"outputs": [], | ||
"source": [ | ||
"import scipy, importlib, pprint, matplotlib.pyplot as plt, warnings\n", | ||
"from glmnet import glmnet; from glmnetPlot import glmnetPlot\n", | ||
"from glmnetPrint import glmnetPrint; from glmnetCoef import glmnetCoef; from glmnetPredict import glmnetPredict\n", | ||
"from cvglmnet import cvglmnet; from cvglmnetCoef import cvglmnetCoef\n", | ||
"from cvglmnetPlot import cvglmnetPlot; from cvglmnetPredict import cvglmnetPredict" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": 97, | ||
"metadata": {}, | ||
"outputs": [ | ||
{ | ||
"ename": "ValueError", | ||
"evalue": "loadGlmlib does not currently work for windows", | ||
"output_type": "error", | ||
"traceback": [ | ||
"\u001b[1;31m---------------------------------------------------------------------------\u001b[0m", | ||
"\u001b[1;31mValueError\u001b[0m Traceback (most recent call last)", | ||
"\u001b[1;32m<ipython-input-97-f4c9fa417bb8>\u001b[0m in \u001b[0;36m<module>\u001b[1;34m\u001b[0m\n\u001b[1;32m----> 1\u001b[1;33m fit = glmnet(x = X, y = Y, family = 'gaussian', \\\n\u001b[0m\u001b[0;32m 2\u001b[0m alpha = 0.1 )\n", | ||
"\u001b[1;32mc:\\python\\python38\\lib\\site-packages\\glmnet_python\\glmnet.py\u001b[0m in \u001b[0;36mglmnet\u001b[1;34m(x, y, family, **options)\u001b[0m\n\u001b[0;32m 446\u001b[0m \u001b[1;32mif\u001b[0m \u001b[0mfamily\u001b[0m \u001b[1;33m==\u001b[0m \u001b[1;34m'gaussian'\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 447\u001b[0m \u001b[1;31m# call elnet\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m--> 448\u001b[1;33m fit = elnet(x, is_sparse, irs, pcs, y, weights, offset, gtype, parm, \n\u001b[0m\u001b[0;32m 449\u001b[0m \u001b[0mlempty\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mnvars\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mjd\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mvp\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mcl\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mne\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mnx\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mnlam\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mflmin\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mulam\u001b[0m\u001b[1;33m,\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 450\u001b[0m thresh, isd, intr, maxit, family)\n", | ||
"\u001b[1;32mc:\\python\\python38\\lib\\site-packages\\glmnet_python\\elnet.py\u001b[0m in \u001b[0;36melnet\u001b[1;34m(x, is_sparse, irs, pcs, y, weights, offset, gtype, parm, lempty, nvars, jd, vp, cl, ne, nx, nlam, flmin, ulam, thresh, isd, intr, maxit, family)\u001b[0m\n\u001b[0;32m 15\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 16\u001b[0m \u001b[1;31m# load shared fortran library\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m---> 17\u001b[1;33m \u001b[0mglmlib\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mloadGlmLib\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m 18\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 19\u001b[0m \u001b[1;31m# pre-process data\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n", | ||
"\u001b[1;32mc:\\python\\python38\\lib\\site-packages\\glmnet_python\\loadGlmLib.py\u001b[0m in \u001b[0;36mloadGlmLib\u001b[1;34m()\u001b[0m\n\u001b[0;32m 26\u001b[0m \u001b[1;32melif\u001b[0m \u001b[0mos\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mname\u001b[0m \u001b[1;33m==\u001b[0m \u001b[1;34m'nt'\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 27\u001b[0m \u001b[1;31m# this does not currently work\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m---> 28\u001b[1;33m \u001b[1;32mraise\u001b[0m \u001b[0mValueError\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;34m'loadGlmlib does not currently work for windows'\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m 29\u001b[0m \u001b[1;31m# glmlib = ctypes.windll.LoadLibrary(glmnet_dll)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 30\u001b[0m \u001b[1;32melse\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n", | ||
"\u001b[1;31mValueError\u001b[0m: loadGlmlib does not currently work for windows" | ||
] | ||
} | ||
], | ||
"source": [ | ||
"fit = glmnet(x = X, y = Y, family = 'gaussian', \\\n", | ||
" alpha = 0.1 )" | ||
] | ||
}, | ||
{ | ||
"cell_type": "markdown", | ||
"metadata": {}, | ||
"source": [ | ||
"Problems with loadGlmlib does not currently work for windows. According to this [link](https://web.stanford.edu/~hastie/glmnet_python/), this library is currently supported in Linux. \\\n", | ||
"Here is some explanation about how to proceede like cv.glamee" | ||
] | ||
} | ||
], | ||
"metadata": { | ||
"hide_input": false, | ||
"kernelspec": { | ||
"display_name": "Python 3", | ||
"language": "python", | ||
"name": "python3" | ||
}, | ||
"language_info": { | ||
"codemirror_mode": { | ||
"name": "ipython", | ||
"version": 3 | ||
}, | ||
"file_extension": ".py", | ||
"mimetype": "text/x-python", | ||
"name": "python", | ||
"nbconvert_exporter": "python", | ||
"pygments_lexer": "ipython3", | ||
"version": "3.8.6" | ||
} | ||
}, | ||
"nbformat": 4, | ||
"nbformat_minor": 4 | ||
} |
Oops, something went wrong.