import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt


df = pd.read_csv("../DATA/mouse_viral_study.csv")


df.head()


sns.scatterplot(x='Med_1_mL',y='Med_2_mL',data=df,hue='Virus Present')

<AxesSubplot:xlabel='Med_1_mL', ylabel='Med_2_mL'>


X=df.drop('Virus Present',axis=1)


y=df['Virus Present']


from sklearn.svm import SVC


model=SVC(kernel='linear',C=0.1)


model.fit(X,y)

SVC(C=0.1, kernel='linear')


from svm_margin_plot import plot_svm_boundary


plot_svm_boundary(model,X,y)

C:\Users\Aas03\anaconda3\envs\hadeel_en\lib\site-packages\sklearn\base.py:451: UserWarning: X does not have valid feature names, but SVC was fitted with feature names
  "X does not have valid feature names, but"


# projecting highr N-dimension into two dimension
model=SVC(kernel='rbf',C=1000)
model.fit(X,y)
plot_svm_boundary(model,X,y)

C:\Users\Aas03\anaconda3\envs\hadeel_en\lib\site-packages\sklearn\base.py:451: UserWarning: X does not have valid feature names, but SVC was fitted with feature names
  "X does not have valid feature names, but"


# changing the degree to be larger won't make a diffrence as the linear degree provides us with all the information needed
# for this data set

model=SVC(kernel='poly',C=1000)
model.fit(X,y)
plot_svm_boundary(model,X,y)

C:\Users\Aas03\anaconda3\envs\hadeel_en\lib\site-packages\sklearn\base.py:451: UserWarning: X does not have valid feature names, but SVC was fitted with feature names
  "X does not have valid feature names, but"


from sklearn.model_selection import GridSearchCV


svc=SVC()


param_grid={'C':[0.01,0.1,1],'kernel':['linear','rbf']}


model=GridSearchCV(svc,param_grid)


model.fit(X,y)

GridSearchCV(estimator=SVC(),
             param_grid={'C': [0.01, 0.1, 1], 'kernel': ['linear', 'rbf']})


model.best_params_

{'C': 0.01, 'kernel': 'linear'}


df = pd.read_csv('../DATA/cement_slump.csv')


df.head()


df.columns

Index(['Cement', 'Slag', 'Fly ash', 'Water', 'SP', 'Coarse Aggr.',
       'Fine Aggr.', 'SLUMP(cm)', 'FLOW(cm)',
       'Compressive Strength (28-day)(Mpa)'],
      dtype='object')


X=df.drop('Compressive Strength (28-day)(Mpa)',axis=1)


y=df['Compressive Strength (28-day)(Mpa)']


from sklearn.model_selection import train_test_split


x_train,x_test,y_train,y_test=train_test_split(X,y,test_size=0.3,random_state=101)


from sklearn.preprocessing import StandardScaler


scaler=StandardScaler()


x_train=scaler.fit_transform(x_train)
x_test=scaler.transform(x_test)


from sklearn.svm import SVR,LinearSVR


base_model=SVR()


base_model.fit(x_train,y_train)

SVR()


pred=base_model.predict(x_test)


from sklearn.metrics import mean_squared_error , mean_absolute_error


np.sqrt(mean_squared_error(y_test,pred))

6.695914838327133


param_grid = {'C':[0.001,0.01,0.1,0.5,1],
             'kernel':['linear','rbf','poly'],
              'gamma':['scale','auto'],
              'degree':[2,3,4],
              'epsilon':[0,0.01,0.1,0.5,1,2]}


from sklearn.model_selection import GridSearchCV


svr=SVR()
grid_model=GridSearchCV(svr,param_grid=param_grid)


grid_model.fit(x_train,y_train)

GridSearchCV(estimator=SVR(),
             param_grid={'C': [0.001, 0.01, 0.1, 0.5, 1], 'degree': [2, 3, 4],
                         'epsilon': [0, 0.01, 0.1, 0.5, 1, 2],
                         'gamma': ['scale', 'auto'],
                         'kernel': ['linear', 'rbf', 'poly']})


pred=grid_model.predict(x_test)


grid_model.best_params_

{'C': 1, 'degree': 2, 'epsilon': 2, 'gamma': 'scale', 'kernel': 'linear'}


np.sqrt(mean_squared_error(y_test,pred))

3.178210305119839

	Med_1_mL	Med_2_mL	Virus Present
0	6.508231	8.582531	0
1	4.126116	3.073459	1
2	6.427870	6.369758	0
3	3.672953	4.905215	1
4	1.580321	2.440562	1

	Cement	Slag	Fly ash	Water	SP	Coarse Aggr.	Fine Aggr.	SLUMP(cm)	FLOW(cm)	Compressive Strength (28-day)(Mpa)
0	273.0	82.0	105.0	210.0	9.0	904.0	680.0	23.0	62.0	34.99
1	163.0	149.0	191.0	180.0	12.0	843.0	746.0	0.0	20.0	41.14
2	162.0	148.0	191.0	179.0	16.0	840.0	743.0	1.0	20.0	41.81
3	162.0	148.0	190.0	179.0	19.0	838.0	741.0	3.0	21.5	42.08
4	154.0	112.0	144.0	220.0	10.0	923.0	658.0	20.0	64.0	26.82

1-SVM as classification task¶

1-SVM as regression task¶

Using Grid Search to have better model¶