Download as docx, pdf, or txt
Download as docx, pdf, or txt
You are on page 1of 4

# Importing Required Libraries

import pandas as pd

from sklearn.model_selection import train_test_split

from sklearn.linear_model import LogisticRegression

from sklearn import metrics

import seaborn as sn

import matplotlib.pyplot as plt

from sklearn.datasets import load_iris

from sklearn.neighbors import KNeighborsClassifier

# Importing (Reading) Datasets

col_names = ['pregnant', 'glucose', 'bp', 'skin', 'insulin', 'bmi', 'pedigree', 'age', 'label']

data = pd.read_csv('/diabets (2).csv', header=None, names=col_names)

# Displaying the shape and first few rows of the dataset

print(data.shape)

print(data.head())

# Checking for null values

print(data.isnull().sum())

# Assigning dependent and independent variables

feature_cols = ['pregnant', 'insulin', 'bmi', 'age', 'glucose', 'bp', 'pedigree']

x = data[feature_cols]

y = data['label']

# Splitting the dataset into Training and Testing sets

x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.2, random_state=5)

print(x_train.shape, y_train.shape, x_test.shape, y_test.shape)

# Fitting the Model (Logistic Regression)


model = LogisticRegression(solver='lbfgs', max_iter=1000)

model.fit(x_train, y_train)

y_pred = model.predict(x_test)

# Printing test values and predictions

print("Test values are\n", x_test)

print('Predicted labels: ', y_pred)

print('Actual labels:\n', y_test)

# Evaluation Metrics

conf_mat = metrics.confusion_matrix(y_test, y_pred)

print('Confusion Matrix:\n', conf_mat)

accuracy_score = metrics.accuracy_score(y_test, y_pred)

print('Accuracy Score: ', accuracy_score)

print('Accuracy in Percentage: ', int(accuracy_score * 100), '%')

# Plotting the Confusion Matrix

conf_mat_df = pd.crosstab(y_test, y_pred, rownames=['Actual'], colnames=['Predicted'])

sn.heatmap(conf_mat_df, annot=True, fmt='d')

plt.show()

# Loading Iris Dataset

iris = load_iris()

X = iris.data

y = iris.target

feature_names = iris.feature_names

target_names = iris.target_names

# Displaying feature names

print("Feature names:", feature_names)


# Splitting the Iris Dataset into Training and Testing sets

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=1)

print(X_train.shape, X_test.shape)

print(y_train.shape, y_test.shape)

# Fitting the Model (K-Nearest Neighbors)

classifier_knn = KNeighborsClassifier(n_neighbors=3)

classifier_knn.fit(X_train, y_train)

y_pred = classifier_knn.predict(X_test)

# Finding accuracy

print("Accuracy:", metrics.accuracy_score(y_test, y_pred))

# Providing sample data for prediction

sample = [[5, 5, 3, 2], [2, 4, 3, 5]]

preds = classifier_knn.predict(sample)

pred_species = [iris.target_names[p] for p in preds]

print("Predictions:", pred_species)

# Splitting the Iris Dataset into Training and Testing sets with a different test size

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.4, random_state=1)

# Fitting the Model (K-Nearest Neighbors) with the new split

classifier_knn.fit(X_train, y_train)

y_pred = classifier_knn.predict(X_test)

# Finding accuracy with the new split

print("Accuracy with 40% test size:", metrics.accuracy_score(y_test, y_pred))

# Providing sample data for prediction with the new split

preds = classifier_knn.predict(sample)
pred_species = [iris.target_names[p] for p in preds]

print("Predictions with 40% test size:", pred_species)

You might also like