import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
df = pd.read_csv("../DATA/mouse_viral_study.csv")
df.head()
Med_1_mL | Med_2_mL | Virus Present | |
---|---|---|---|
0 | 6.508231 | 8.582531 | 0 |
1 | 4.126116 | 3.073459 | 1 |
2 | 6.427870 | 6.369758 | 0 |
3 | 3.672953 | 4.905215 | 1 |
4 | 1.580321 | 2.440562 | 1 |
sns.scatterplot(x='Med_1_mL',y='Med_2_mL',data=df,hue='Virus Present')
<AxesSubplot:xlabel='Med_1_mL', ylabel='Med_2_mL'>
X=df.drop('Virus Present',axis=1)
y=df['Virus Present']
from sklearn.svm import SVC
model=SVC(kernel='linear',C=0.1)
model.fit(X,y)
SVC(C=0.1, kernel='linear')
from svm_margin_plot import plot_svm_boundary
plot_svm_boundary(model,X,y)
C:\Users\Aas03\anaconda3\envs\hadeel_en\lib\site-packages\sklearn\base.py:451: UserWarning: X does not have valid feature names, but SVC was fitted with feature names "X does not have valid feature names, but"
# projecting highr N-dimension into two dimension
model=SVC(kernel='rbf',C=1000)
model.fit(X,y)
plot_svm_boundary(model,X,y)
C:\Users\Aas03\anaconda3\envs\hadeel_en\lib\site-packages\sklearn\base.py:451: UserWarning: X does not have valid feature names, but SVC was fitted with feature names "X does not have valid feature names, but"
# changing the degree to be larger won't make a diffrence as the linear degree provides us with all the information needed
# for this data set
model=SVC(kernel='poly',C=1000)
model.fit(X,y)
plot_svm_boundary(model,X,y)
C:\Users\Aas03\anaconda3\envs\hadeel_en\lib\site-packages\sklearn\base.py:451: UserWarning: X does not have valid feature names, but SVC was fitted with feature names "X does not have valid feature names, but"
from sklearn.model_selection import GridSearchCV
svc=SVC()
param_grid={'C':[0.01,0.1,1],'kernel':['linear','rbf']}
model=GridSearchCV(svc,param_grid)
model.fit(X,y)
GridSearchCV(estimator=SVC(), param_grid={'C': [0.01, 0.1, 1], 'kernel': ['linear', 'rbf']})
model.best_params_
{'C': 0.01, 'kernel': 'linear'}
df = pd.read_csv('../DATA/cement_slump.csv')
df.head()
Cement | Slag | Fly ash | Water | SP | Coarse Aggr. | Fine Aggr. | SLUMP(cm) | FLOW(cm) | Compressive Strength (28-day)(Mpa) | |
---|---|---|---|---|---|---|---|---|---|---|
0 | 273.0 | 82.0 | 105.0 | 210.0 | 9.0 | 904.0 | 680.0 | 23.0 | 62.0 | 34.99 |
1 | 163.0 | 149.0 | 191.0 | 180.0 | 12.0 | 843.0 | 746.0 | 0.0 | 20.0 | 41.14 |
2 | 162.0 | 148.0 | 191.0 | 179.0 | 16.0 | 840.0 | 743.0 | 1.0 | 20.0 | 41.81 |
3 | 162.0 | 148.0 | 190.0 | 179.0 | 19.0 | 838.0 | 741.0 | 3.0 | 21.5 | 42.08 |
4 | 154.0 | 112.0 | 144.0 | 220.0 | 10.0 | 923.0 | 658.0 | 20.0 | 64.0 | 26.82 |
df.columns
Index(['Cement', 'Slag', 'Fly ash', 'Water', 'SP', 'Coarse Aggr.', 'Fine Aggr.', 'SLUMP(cm)', 'FLOW(cm)', 'Compressive Strength (28-day)(Mpa)'], dtype='object')
X=df.drop('Compressive Strength (28-day)(Mpa)',axis=1)
y=df['Compressive Strength (28-day)(Mpa)']
from sklearn.model_selection import train_test_split
x_train,x_test,y_train,y_test=train_test_split(X,y,test_size=0.3,random_state=101)
from sklearn.preprocessing import StandardScaler
scaler=StandardScaler()
x_train=scaler.fit_transform(x_train)
x_test=scaler.transform(x_test)
from sklearn.svm import SVR,LinearSVR
base_model=SVR()
base_model.fit(x_train,y_train)
SVR()
pred=base_model.predict(x_test)
from sklearn.metrics import mean_squared_error , mean_absolute_error
np.sqrt(mean_squared_error(y_test,pred))
6.695914838327133
param_grid = {'C':[0.001,0.01,0.1,0.5,1],
'kernel':['linear','rbf','poly'],
'gamma':['scale','auto'],
'degree':[2,3,4],
'epsilon':[0,0.01,0.1,0.5,1,2]}
from sklearn.model_selection import GridSearchCV
svr=SVR()
grid_model=GridSearchCV(svr,param_grid=param_grid)
grid_model.fit(x_train,y_train)
GridSearchCV(estimator=SVR(), param_grid={'C': [0.001, 0.01, 0.1, 0.5, 1], 'degree': [2, 3, 4], 'epsilon': [0, 0.01, 0.1, 0.5, 1, 2], 'gamma': ['scale', 'auto'], 'kernel': ['linear', 'rbf', 'poly']})
pred=grid_model.predict(x_test)
grid_model.best_params_
{'C': 1, 'degree': 2, 'epsilon': 2, 'gamma': 'scale', 'kernel': 'linear'}
np.sqrt(mean_squared_error(y_test,pred))
3.178210305119839