Skip to content

Commit d9ff715

Browse files
author
Shimanto
authored
Add files via upload
1 parent cc0500a commit d9ff715

13 files changed

Lines changed: 6526 additions & 0 deletions
96.5 KB
Loading
94.3 KB
Loading
91.5 KB
Loading
100 KB
Loading
Lines changed: 162 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,162 @@
1+
{
2+
"cells": [
3+
{
4+
"cell_type": "markdown",
5+
"metadata": {},
6+
"source": [
7+
"# Lasso Regression With python"
8+
]
9+
},
10+
{
11+
"cell_type": "markdown",
12+
"metadata": {},
13+
"source": [
14+
"# Load libariry "
15+
]
16+
},
17+
{
18+
"cell_type": "code",
19+
"execution_count": 1,
20+
"metadata": {
21+
"collapsed": true
22+
},
23+
"outputs": [],
24+
"source": [
25+
"import matplotlib.pyplot as plt\n",
26+
"import numpy as np\n",
27+
"import pandas as pd\n",
28+
"\n",
29+
"from scipy.stats import skew\n",
30+
"from scipy.special import boxcox1p\n",
31+
"from sklearn.feature_selection import RFECV\n",
32+
"from sklearn.linear_model import Lasso\n",
33+
"from sklearn.model_selection import cross_val_score"
34+
]
35+
},
36+
{
37+
"cell_type": "markdown",
38+
"metadata": {},
39+
"source": [
40+
"## Load dataset"
41+
]
42+
},
43+
{
44+
"cell_type": "code",
45+
"execution_count": 2,
46+
"metadata": {
47+
"collapsed": true
48+
},
49+
"outputs": [],
50+
"source": [
51+
"train = pd.read_csv('train.csv')\n",
52+
"test = pd.read_csv('test.csv')"
53+
]
54+
},
55+
{
56+
"cell_type": "markdown",
57+
"metadata": {},
58+
"source": [
59+
"## remove outliers"
60+
]
61+
},
62+
{
63+
"cell_type": "code",
64+
"execution_count": 3,
65+
"metadata": {
66+
"collapsed": true
67+
},
68+
"outputs": [],
69+
"source": [
70+
"train = train[~((train['GrLivArea'] > 4000) & (train['SalePrice'] < 300000))]"
71+
]
72+
},
73+
{
74+
"cell_type": "code",
75+
"execution_count": 4,
76+
"metadata": {
77+
"collapsed": true
78+
},
79+
"outputs": [],
80+
"source": [
81+
"all_data = pd.concat((train.loc[:,'MSSubClass':'SaleCondition'],\n",
82+
" test.loc[:,'MSSubClass':'SaleCondition']))"
83+
]
84+
},
85+
{
86+
"cell_type": "markdown",
87+
"metadata": {},
88+
"source": [
89+
"## Drop some features to avoid multicollinearity"
90+
]
91+
},
92+
{
93+
"cell_type": "code",
94+
"execution_count": 5,
95+
"metadata": {
96+
"collapsed": true
97+
},
98+
"outputs": [],
99+
"source": [
100+
"all_data.drop(['1stFlrSF', 'GarageArea', 'TotRmsAbvGrd'], axis=1, inplace=True)"
101+
]
102+
},
103+
{
104+
"cell_type": "code",
105+
"execution_count": 6,
106+
"metadata": {
107+
"collapsed": true
108+
},
109+
"outputs": [],
110+
"source": [
111+
"train[\"SalePrice\"] = np.log1p(train[\"SalePrice\"])\n",
112+
"\n",
113+
"numeric_feats = all_data.dtypes[all_data.dtypes != \"object\"].index\n",
114+
"\n",
115+
"skewed_feats = train[numeric_feats].apply(lambda x: skew(x.dropna()))#compute skewness"
116+
]
117+
},
118+
{
119+
"cell_type": "code",
120+
"execution_count": null,
121+
"metadata": {
122+
"collapsed": true
123+
},
124+
"outputs": [],
125+
"source": [
126+
"skewed_feats = skewed_feats[skewed_feats > 0.65]\n",
127+
"skewed_feats = skewed_feats.index\n",
128+
"\n",
129+
"all_data[skewed_feats] = boxcox1p(all_data[skewed_feats], 0.14)\n",
130+
"\n",
131+
"all_data = pd.get_dummies(all_data)\n",
132+
"\n",
133+
"all_data = all_data.fillna(all_data.mean())\n",
134+
"\n",
135+
"X_train = all_data[:train.shape[0]]\n",
136+
"X_test = all_data[train.shape[0]:]\n",
137+
"y = train.SalePrice"
138+
]
139+
}
140+
],
141+
"metadata": {
142+
"kernelspec": {
143+
"display_name": "Python 3",
144+
"language": "python",
145+
"name": "python3"
146+
},
147+
"language_info": {
148+
"codemirror_mode": {
149+
"name": "ipython",
150+
"version": 3
151+
},
152+
"file_extension": ".py",
153+
"mimetype": "text/x-python",
154+
"name": "python",
155+
"nbconvert_exporter": "python",
156+
"pygments_lexer": "ipython3",
157+
"version": "3.6.3"
158+
}
159+
},
160+
"nbformat": 4,
161+
"nbformat_minor": 2
162+
}

0 commit comments

Comments
 (0)