Professional Documents
Culture Documents
B24 ML Exp-1
B24 ML Exp-1
Experiment No. 1
A.1 Aim:
A.3 Outcome:
Students will be able to implement Logistic Regression.
A.4 Theory:
(Students must submit the soft copy as per following segments within two hours of the practical. The
soft copy must be uploaded on the Blackboard or emailed to the concerned lab in charge faculties at
the end of the practical in case the there is no Black board access available)
import csv
import pandas as pd
print(df)
df.shape
df.isnull().sum()
df.head()
label_encoder = preprocessing.LabelEncoder()
df['GENDER']= label_encoder.fit_transform(df['GENDER'])
df['GENDER'].unique()
df['LUNG_CANCER']= label_encoder.fit_transform(df['LUNG_CANCER'])
df['LUNG_CANCER'].unique()
df.head()
plt.figure(figsize=(14, 8))
ax = plt.gca()
df.boxplot()
log_reg.fit(X_train, y_train)
y_pred = log_reg.predict(X_test)
y_pre=logreg_model.predict(X_train)
y_pred=logreg_model.predict(X_test)
y_pred1=logreg_model.predict_proba(X_test)
# Specify parameters
param_grid = [
{'C': c_values, 'penalty': ['l1'], 'solver' : ['liblinear'],
'multi_class' : ['ovr']},
{'C': c_values, 'penalty': ['l2'], 'solver' : ['liblinear', 'newton-
cg', 'lbfgs'], 'multi_class' : ['ovr']}
]
grid = GridSearchCV(logreg_model, param_grid, cv=10, scoring='accuracy')
grid.fit(X_train,y_train)
print(grid.best_params_)
print(grid.best_score_)
print(grid.best_estimator_)
predictionforest = grid.best_estimator_.predict(X_test)
y_pred2=grid.best_estimator_.predict_proba(X_test)
print(confusion_matrix(y_test,predictionforest))
#print ("{0}".format(metrics.confusion_matrix(y_test, y_pred, labels=[1,
0])))
print ("Classification Report")
# labels for set 1=True to upper left and 0 = False to lower right
print ("{0}".format(metrics.classification_report(y_test,
predictionforest, labels=[1, 0])))
acc_hyper = accuracy_score(y_test,predictionforest)
print (acc_hyper)
print("roc_auc_score after hypertuning:
{}".format(roc_auc_score(y_test,y_pred2[:,1])))
print("roc_auc_score original:
{}".format(roc_auc_score(y_test,y_pred1[:,1])))
print("CV_score: {}".format(cross_val_score(log_reg, X, y, cv=10,
scoring='accuracy').mean()))
B.2 Input and Output: