harunurrashid97
diff --git a/‎Lasso Regression with python and R/Lasso Regression in R1.png‎
96.5 KB b/‎Lasso Regression with python and R/Lasso Regression in R1.png‎
96.5 KB
diff --git a/‎Lasso Regression with python and R/Lasso Regression in R2.png‎
94.3 KB b/‎Lasso Regression with python and R/Lasso Regression in R2.png‎
94.3 KB
diff --git a/‎Lasso Regression with python and R/Lasso Regression in R3.png‎
91.5 KB b/‎Lasso Regression with python and R/Lasso Regression in R3.png‎
91.5 KB
diff --git a/‎Lasso Regression with python and R/Lasso Regression in R4.png‎
100 KB b/‎Lasso Regression with python and R/Lasso Regression in R4.png‎
100 KB
diff --git a/‎Lasso Regression with python and R/Lasso Regression with python.ipynb‎
Lines changed: 162 additions & 0 deletions b/‎Lasso Regression with python and R/Lasso Regression with python.ipynb‎
Lines changed: 162 additions & 0 deletions
@@ -0,0 +1,162 @@
+{
+ "cells": [
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "# Lasso Regression With python"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "# Load libariry "
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 1,
+   "metadata": {
+    "collapsed": true
+   },
+   "outputs": [],
+   "source": [
+    "import matplotlib.pyplot as plt\n",
+    "import numpy as np\n",
+    "import pandas as pd\n",
+    "\n",
+    "from scipy.stats import skew\n",
+    "from scipy.special import boxcox1p\n",
+    "from sklearn.feature_selection import RFECV\n",
+    "from sklearn.linear_model import Lasso\n",
+    "from sklearn.model_selection import cross_val_score"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Load dataset"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 2,
+   "metadata": {
+    "collapsed": true
+   },
+   "outputs": [],
+   "source": [
+    "train = pd.read_csv('train.csv')\n",
+    "test = pd.read_csv('test.csv')"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## remove outliers"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 3,
+   "metadata": {
+    "collapsed": true
+   },
+   "outputs": [],
+   "source": [
+    "train = train[~((train['GrLivArea'] > 4000) & (train['SalePrice'] < 300000))]"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 4,
+   "metadata": {
+    "collapsed": true
+   },
+   "outputs": [],
+   "source": [
+    "all_data = pd.concat((train.loc[:,'MSSubClass':'SaleCondition'],\n",
+    "                      test.loc[:,'MSSubClass':'SaleCondition']))"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Drop some features to avoid multicollinearity"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 5,
+   "metadata": {
+    "collapsed": true
+   },
+   "outputs": [],
+   "source": [
+    "all_data.drop(['1stFlrSF', 'GarageArea', 'TotRmsAbvGrd'], axis=1, inplace=True)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 6,
+   "metadata": {
+    "collapsed": true
+   },
+   "outputs": [],
+   "source": [
+    "train[\"SalePrice\"] = np.log1p(train[\"SalePrice\"])\n",
+    "\n",
+    "numeric_feats = all_data.dtypes[all_data.dtypes != \"object\"].index\n",
+    "\n",
+    "skewed_feats = train[numeric_feats].apply(lambda x: skew(x.dropna()))#compute skewness"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "collapsed": true
+   },
+   "outputs": [],
+   "source": [
+    "skewed_feats = skewed_feats[skewed_feats > 0.65]\n",
+    "skewed_feats = skewed_feats.index\n",
+    "\n",
+    "all_data[skewed_feats] = boxcox1p(all_data[skewed_feats], 0.14)\n",
+    "\n",
+    "all_data = pd.get_dummies(all_data)\n",
+    "\n",
+    "all_data = all_data.fillna(all_data.mean())\n",
+    "\n",
+    "X_train = all_data[:train.shape[0]]\n",
+    "X_test = all_data[train.shape[0]:]\n",
+    "y = train.SalePrice"
+   ]
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.6.3"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 2
+}