In [1]:
import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt

1-SVM as classification task¶

In [2]:
df = pd.read_csv("../DATA/mouse_viral_study.csv")
In [3]:
df.head()
Out[3]:
Med_1_mL Med_2_mL Virus Present
0 6.508231 8.582531 0
1 4.126116 3.073459 1
2 6.427870 6.369758 0
3 3.672953 4.905215 1
4 1.580321 2.440562 1
In [4]:
sns.scatterplot(x='Med_1_mL',y='Med_2_mL',data=df,hue='Virus Present')
Out[4]:
<AxesSubplot:xlabel='Med_1_mL', ylabel='Med_2_mL'>
In [5]:
X=df.drop('Virus Present',axis=1)
In [6]:
y=df['Virus Present']
In [7]:
from sklearn.svm import SVC
In [8]:
model=SVC(kernel='linear',C=0.1)
In [9]:
model.fit(X,y)
Out[9]:
SVC(C=0.1, kernel='linear')
In [10]:
from svm_margin_plot import plot_svm_boundary
In [11]:
plot_svm_boundary(model,X,y)
C:\Users\Aas03\anaconda3\envs\hadeel_en\lib\site-packages\sklearn\base.py:451: UserWarning: X does not have valid feature names, but SVC was fitted with feature names
  "X does not have valid feature names, but"
In [12]:
# projecting highr N-dimension into two dimension
model=SVC(kernel='rbf',C=1000)
model.fit(X,y)
plot_svm_boundary(model,X,y)
C:\Users\Aas03\anaconda3\envs\hadeel_en\lib\site-packages\sklearn\base.py:451: UserWarning: X does not have valid feature names, but SVC was fitted with feature names
  "X does not have valid feature names, but"
In [13]:
# changing the degree to be larger won't make a diffrence as the linear degree provides us with all the information needed
# for this data set

model=SVC(kernel='poly',C=1000)
model.fit(X,y)
plot_svm_boundary(model,X,y)
C:\Users\Aas03\anaconda3\envs\hadeel_en\lib\site-packages\sklearn\base.py:451: UserWarning: X does not have valid feature names, but SVC was fitted with feature names
  "X does not have valid feature names, but"
In [14]:
from sklearn.model_selection import GridSearchCV
In [15]:
svc=SVC()
In [16]:
param_grid={'C':[0.01,0.1,1],'kernel':['linear','rbf']}
In [17]:
model=GridSearchCV(svc,param_grid)
In [18]:
model.fit(X,y)
Out[18]:
GridSearchCV(estimator=SVC(),
             param_grid={'C': [0.01, 0.1, 1], 'kernel': ['linear', 'rbf']})
In [19]:
model.best_params_
Out[19]:
{'C': 0.01, 'kernel': 'linear'}

1-SVM as regression task¶

In [20]:
df = pd.read_csv('../DATA/cement_slump.csv')
In [21]:
df.head()
Out[21]:
Cement Slag Fly ash Water SP Coarse Aggr. Fine Aggr. SLUMP(cm) FLOW(cm) Compressive Strength (28-day)(Mpa)
0 273.0 82.0 105.0 210.0 9.0 904.0 680.0 23.0 62.0 34.99
1 163.0 149.0 191.0 180.0 12.0 843.0 746.0 0.0 20.0 41.14
2 162.0 148.0 191.0 179.0 16.0 840.0 743.0 1.0 20.0 41.81
3 162.0 148.0 190.0 179.0 19.0 838.0 741.0 3.0 21.5 42.08
4 154.0 112.0 144.0 220.0 10.0 923.0 658.0 20.0 64.0 26.82
In [22]:
df.columns
Out[22]:
Index(['Cement', 'Slag', 'Fly ash', 'Water', 'SP', 'Coarse Aggr.',
       'Fine Aggr.', 'SLUMP(cm)', 'FLOW(cm)',
       'Compressive Strength (28-day)(Mpa)'],
      dtype='object')
In [23]:
X=df.drop('Compressive Strength (28-day)(Mpa)',axis=1)
In [24]:
y=df['Compressive Strength (28-day)(Mpa)']
In [25]:
from sklearn.model_selection import train_test_split
In [26]:
x_train,x_test,y_train,y_test=train_test_split(X,y,test_size=0.3,random_state=101)
In [27]:
from sklearn.preprocessing import StandardScaler
In [28]:
scaler=StandardScaler()
In [29]:
x_train=scaler.fit_transform(x_train)
x_test=scaler.transform(x_test)
In [30]:
from sklearn.svm import SVR,LinearSVR
In [31]:
base_model=SVR()
In [32]:
base_model.fit(x_train,y_train)
Out[32]:
SVR()
In [33]:
pred=base_model.predict(x_test)
In [34]:
from sklearn.metrics import mean_squared_error , mean_absolute_error
In [35]:
np.sqrt(mean_squared_error(y_test,pred))
Out[35]:
6.695914838327133

Using Grid Search to have better model¶

In [40]:
param_grid = {'C':[0.001,0.01,0.1,0.5,1],
             'kernel':['linear','rbf','poly'],
              'gamma':['scale','auto'],
              'degree':[2,3,4],
              'epsilon':[0,0.01,0.1,0.5,1,2]}
In [41]:
from sklearn.model_selection import GridSearchCV
In [42]:
svr=SVR()
grid_model=GridSearchCV(svr,param_grid=param_grid)
In [43]:
grid_model.fit(x_train,y_train)
Out[43]:
GridSearchCV(estimator=SVR(),
             param_grid={'C': [0.001, 0.01, 0.1, 0.5, 1], 'degree': [2, 3, 4],
                         'epsilon': [0, 0.01, 0.1, 0.5, 1, 2],
                         'gamma': ['scale', 'auto'],
                         'kernel': ['linear', 'rbf', 'poly']})
In [44]:
pred=grid_model.predict(x_test)
In [47]:
grid_model.best_params_
Out[47]:
{'C': 1, 'degree': 2, 'epsilon': 2, 'gamma': 'scale', 'kernel': 'linear'}
In [45]:
np.sqrt(mean_squared_error(y_test,pred))
Out[45]:
3.178210305119839