ML Lab Manual PDF

1.
Implement and demonstrate the FIND-S algorithm for finding the most specific hypothesis based on
a given set of training data samples. Read the training data from a .csv file.
import csv
with open('1.csv', 'r') as f:
reader = csv.reader(f)
your_list = list(reader)
h = [['0', '0', '0', '0', '0', '0']]
for i in your_list:
print(i)
if i[-1] == "Yes":
j = 0
for x in i:
if x != "Yes":
if x != h[0][j] and h[0][j] == '0':
h[0][j] = x
elif x != h[0][j] and h[0][j] != '0':
h[0][j] = '?'
else:
pass
j = j + 1
print("A Maximally Specific hypothesis is")
print(h)
2. For a given set of training data examples stored in a .CSV file, implement and demonstrate the
Candidate-Elimination algorithm to output a description of the set of all hypotheses consistent with
the training examples.
import csv
a = []
print("\n The Given Training Data Set \n")
with open('enjoysport.csv', 'r') as csvFile:

reader = csv.reader(csvFile)
for row in reader:
a.append (row)
print(row)
num_attributes = len(a[0])-1
print("\n The initial value of hypothesis: ")

S = ['0'] * num_attributes
G = ['?'] * num_attributes
print ("\n The most specific hypothesis S0 : [0,0,0,0,0,0]\n")
print (" \n The most general hypothesis G0 : [?,?,?,?,?,?]\n")
# Comparing with First Training Example

for j in range(0,num_attributes):
S[j] = a[0][j];
# Comparing with Remaining Training Examples of Given Data Set
print("\n Candidate Elimination algorithm Hypotheses Version Space Computation\n")

temp=[]
for i in range(0,len(a)):
if a[i][num_attributes]=='Yes':
if a[i][j]!=S[j]:
S[j]='?'
for k in range(1,len(temp)):
if temp[k][j]!= '?' and temp[k][j] !=S[j]:
del temp[k]
print(" For Training Example No :{0} the hypothesis is S{0} ".format(i+1),S)

if (len(temp)==0):
print(" For Training Example No :{0} the hypothesis is G{0}
".format(i+1),G)
else:
print(" For Training Example No :{0} the hypothesis is
G{0}".format(i+1),temp)
if a[i][num_attributes]=='No':
if S[j] != a[i][j] and S[j]!= '?':
G[j]=S[j]
temp.append(G)
G = ['?'] * num_attributes
print(" For Training Example No :{0} the hypothesis is S{0} ".format(i+1),S)

print(" For Training Example No :{0} the hypothesis is G{0}".format(i+1),temp)
3. Write a program to demonstrate the working of the decision tree based ID3 algorithm. Use an
appropriate data set for building the decision tree and apply this knowledge to classify a new
sample.
import pandas as pd
import numpy as np
dataset= pd.read_csv('P3_Tennis.csv')
def entropy(target_col):
elements,counts = np.unique(target_col,return_counts = True)
entropy = np.sum([(-counts[i]/np.sum(counts))*np.log2(counts[i]/np.sum(counts))
for i in range(len(elements))])
return entropy
def InfoGain(data,split_attribute_name,target_name="PlayTennis"):
total_entropy = entropy(data[target_name])
vals,counts= np.unique(data[split_attribute_name],return_counts=True)
Weighted_Entropy =
np.sum([(counts[i]/np.sum(counts))*entropy(data.where(data[split_attribute_name]==v
als[i]).dropna()[target_name]) for i in range(len(vals))])
InfoGain = total_entropy - Weighted_Entropy

return InfoGain
def
ID3(data,originaldata,features,target_attribute_name="PlayTennis",parent_node_class
= None):
if len(np.unique(data[target_attribute_name])) <= 1:
return np.unique(data[target_attribute_name])[0]
elif len(data)==0:
return
np.unique(originaldata[target_attribute_name])[np.argmax(np.unique(originaldata[tar
get_attribute_name],return_counts=True)[1])]
elif len(features) ==0:

return parent_node_class
else:
parent_node_class =
np.unique(data[target_attribute_name])[np.argmax(np.unique(data[target_attribute_na
me],return_counts=True)[1])]
item_values = [InfoGain(data,feature,target_attribute_name) for feature in

features] #Return the information gain values for the features in the dataset
best_feature_index = np.argmax(item_values)
best_feature = features[best_feature_index]
tree = {best_feature:{}}
features = [i for i in features if i != best_feature]
for value in np.unique(data[best_feature]):
value = value
sub_data = data.where(data[best_feature] == value).dropna()
subtree =
ID3(sub_data,dataset,features,target_attribute_name,parent_node_class)
tree[best_feature][value] = subtree
return(tree)
tree = ID3(dataset,dataset,dataset.columns[:-1])
print(dataset.head())
print(' \nDisplay Tree\n',tree)
4. Build an Artificial Neural Network by implementing the Back propagation algorithm and test the
same using appropriate data sets.
import math
def sigmoid(x):
y= 1/(1+math.exp(-x))
return y
##define inputs and target for xor gate
x1=[0,0,1,1] #input1
x2=[0,1,0,1] #input2
t=[0,1,1,0] #target
## Initialize random weights and biases
# Hidden layer first Perceptron

b1=-0.3
w11=0.21
w21= 0.15
# Hidden Layer Second Perceptron
b2=0.25
w12=-0.4
w22=0.1
# Output layer Perceptron
b3=-0.4
w13=-0.2
w23=0.3
error=0
iteration=0
train=True
print("weight are:")
print("w11 : %4.2f w12: %4.2f w21: %4.2f w22: %4.2f w13: %4.2f w23: %4.2f \n"
%(w11,w12,w21,w22,w13,w23))
## Training Starts
while(train):
for i in range(len(x1)):
##input for each perceptron of hidden layer

z_in1=b1+x1[i]*w11+x2[i]*w21
z_in2=b2+x1[i]*w12+x2[i]*w22
##computing activation function output
z1=round(sigmoid(z_in1),4)
z2=round(sigmoid(z_in2),4)
# Output layer forward pass

y_in=b3+z1*w13+z2*w23
y=round(sigmoid(y_in),4)
##error computation
del_k=round((t[i]-y)*y*(1-y),4)
error=del_k
##Back pass
# weight update for output layer
w13=round(w13+del_k*z1,4)
w23=round(w23+del_k*z2,4)
b3=round(b3+del_k,4)
##error computation for hidden layer

del_1=del_k*w13*z1*(1-z1)
del_2=del_k*w23*z2*(1-z2)
## update weight and biases

b1=round(b1+del_1,4)
w11=round(w11+del_1*x1[i],4)
b2=round(b2+del_2,4)
print("Iteration: ",iteration)
print("w11 : %5.4f w12: %5.4f w21: %5.4f w22: %5.4f w13: %5.4f w23: %5.4f "
%(w11,w12,w21,w22,w13,w23))
print("Error: %5.3f" %del_k)
iteration=iteration+1
if(iteration==1000):
train=False
5. Write a program to implement the naïve Bayesian classifier for a sample training data set stored as a
.csv file. Compute the accuracy of the classifier, considering few test data sets.
import csv
import math
import random
import statistics
def calculate_probability(x, mean, stdev):

exponent = math.exp(-(math.pow(x - mean, 2) / (2 * math.pow(stdev, 2))))
return (1 / (math.sqrt(2 * math.pi) * stdev)) * exponent
dataset = []
dataset_size = 0
with open('lab5.csv') as csvfile:
lines = csv.reader(csvfile)
for row in lines:
dataset.append([float(attr) for attr in row])
dataset_size = len(dataset)
print('Size of dataset is : ', dataset_size)
train_size = int(0.7 * dataset_size) # 70 % as test data
print(train_size)
X_train = []
X_test = dataset.copy()
training_indexes = random.sample(range(dataset_size), train_size)
# Split Data
for i in training_indexes:
X_train.append(dataset[i])
X_test.remove(dataset[i])
# Separate Data based on class value

classes = {}
for samples in X_train:
last = int(samples[-1])
if last not in classes:
classes[last] = []
classes[last].append(samples)
# Find mean and variance of each attribute by adding all attributes

summaries = {}
for classValue, training_data in classes.items():
summary = [(statistics.mean(attribute), statistics.stdev(attribute)) for attribute
in zip(*training_data)]
del summary[-1]
summaries[classValue] = summary
X_prediction = []
# Predict the output of test data

for i in X_test:
probabilities = {}
for classValue, classSummary in summaries.items():
probabilities[classValue] = 1
for index, attr in enumerate(classSummary):
probabilities[classValue] *= calculate_probability(i[index], attr[0],
attr[1])
best_label, best_prob = None, -1

for classValue, probability in probabilities.items():
if best_label is None or probability > best_prob:
best_prob = probability
best_label = classValue
X_prediction.append(best_label)
# Find Accuracy
correct = 0
for index, key in enumerate(X_test):
if X_test[index][-1] == X_prediction[index]:
correct += 1
print("Accuracy : ", correct / (float(len(X_test))) * 100)
6. Assuming a set of documents that need to be classified, use the naïve Bayesian Classifier model to
perform this task. Built-in Java classes/API can be used to write the program. Calculate the accuracy,
precision, and recall for your data set.
import pandas as pd
dataset = pd.read_csv('naivetext1.txt',names =['text','tag'])
dataset.head()
#encoding
from sklearn.preprocessing import LabelEncoder
encoder = LabelEncoder()
dataset['tag']= encoder.fit_transform(dataset['tag'])
#splitting
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(dataset['text'], dataset['tag'],
test_size=0.2)
#vectorization
from sklearn.feature_extraction.text import CountVectorizer
count_vect = CountVectorizer()
xtrain_dtm = count_vect.fit_transform(X_train)
xtest_dtm=count_vect.transform(X_test)
dataset=pd.DataFrame(xtrain_dtm.toarray(),columns=count_vect.get_feature_names())
#prediction
from sklearn.naive_bayes import MultinomialNB
clf = MultinomialNB().fit(xtrain_dtm,y_train)#to load the text data
y_pred= clf.predict(xtest_dtm)
#output
from sklearn.metrics import precision_score,accuracy_score,recall_score
print('Precision',precision_score(y_test, y_pred))
print('Accuracy',accuracy_score(y_test, y_pred))
print('Recall',recall_score(y_test, y_pred))
7. Write a program to construct a Bayesian network considering medical data. Use this model to
demonstrate the diagnosis of heart patients using standard Heart Disease Data Set. You can use
Java/Python ML library classes/API.
import pandas as pd
col =['Age','Gender','FamilyHist','Diet','LifeStyle','Cholesterol','HeartDisease']
data = pd.read_csv('heart_disease_data.csv',names =col )
print(data)
#encoding
from sklearn.preprocessing import LabelEncoder
encoder = LabelEncoder()
for i in range(len(col)):
data.iloc[:,i] = encoder.fit_transform(data.iloc[:,i])
#spliting data
X = data.iloc[:,0:6]
y = data.iloc[:,-1]
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2)
#prediction
from sklearn.naive_bayes import GaussianNB
clf = GaussianNB()
clf.fit(X_train,y_train)
y_pred = clf.predict(X_test)
#confusion mtx output

from sklearn.metrics import confusion_matrix
print(confusion_matrix(y_test, y_pred))
8. Apply EM algorithm to cluster a set of data stored in a .CSV file. Use the same data set for clustering
using k-Means algorithm. Compare the results of these two algorithms and comment on the quality
of clustering. You can add Java/Python ML library classes/API in the program.
import numpy as np
import pandas as pd
from matplotlib import pyplot as plt
from sklearn.mixture import GaussianMixture
from sklearn.cluster import KMeans
# Importing the dataset
data = pd.read_csv('xclara.csv')
data.head()
# Getting the values and plotting it

f1 = data['V1'].values
f2 = data['V2'].values
X = np.array(list(zip(f1, f2)))
kmeans = KMeans(3, random_state=0)

labels = kmeans.fit(X).predict(X)
centroids = kmeans.cluster_centers_
plt.scatter(X[:, 0], X[:, 1], c=labels, s=40, cmap='viridis');
print('Graph using Kmeans Algorithm')
plt.scatter(centroids[:, 0], centroids[:, 1], marker='*', s=200, c='#050505')
plt.show()
#gmm
gmm = GaussianMixture(n_components=3).fit(X)
labels = gmm.predict(X)
# plot
probs = gmm.predict_proba(X)
size = 10 * probs.max(1) ** 3
print('Graph using EM Algorithm')
#print(probs[:300].round(4))
plt.scatter(X[:, 0], X[:, 1], c=labels, s=size, cmap='viridis');
plt.show()
9. Write a program to implement k-Nearest Neighbour algorithm to classify the iris data set. Print both
correct and wrong predictions. Java/Python ML library classes can be used for this problem.

from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import classification_report, confusion_matrix
from sklearn import datasets
iris=datasets.load_iris()
iris_data=iris.data
iris_labels=iris.target
print(iris_labels)
x_train, x_test, y_train,
y_test=train_test_split(iris_data,iris_labels,test_size=0.20)
classifier=KNeighborsClassifier(n_neighbors=5)
classifier.fit(x_train,y_train)
y_pred=classifier.predict(x_test)
print('confusion matrix')
print(confusion_matrix(y_test,y_pred))
print('Accuracy metrics')
print(classification_report(y_test,y_pred))
10. Implement the non-parametric Locally Weighted Regression algorithm in order to fit data points.
Select appropriate data set for your experiment and draw graphs.
import matplotlib.pyplot as plt

import pandas as pd
import numpy as np1
def kernel(point,xmat, k):

m,n = np1.shape(xmat)
weights = np1.mat(np1.eye((m)))
for j in range(m):
diff = point - X[j]
weights[j,j] = np1.exp(diff*diff.T/(-2.0*k**2))
return weights
def localWeight(point,xmat,ymat,k):
wei = kernel(point,xmat,k)
W=(X.T*(wei*X)).I*(X.T*(wei*ymat.T))
return W
def localWeightRegression(xmat,ymat,k):
m,n = np1.shape(xmat)
ypred = np1.zeros(m)
for i in range(m):
ypred[i] = xmat[i]*localWeight(xmat[i],xmat,ymat,k)
return ypred
# load data points

data = pd.read_csv('data10.csv')
bill = np1.array(data.total_bill)
tip = np1.array(data.tip)
#preparing and add 1 in bill

mbill = np1.mat(bill)
mtip = np1.mat(tip)
m= np1.shape(mbill)[1]
one = np1.mat(np1.ones(m))
X= np1.hstack((one.T,mbill.T))
#set k here
ypred = localWeightRegression(X,mtip,2)
SortIndex = X[:,1].argsort(0)
xsort = X[SortIndex][:,0]
fig = plt.figure()
ax = fig.add_subplot(1,1,1)
ax.scatter(bill,tip, color='green')
ax.plot(xsort[:,1],ypred[SortIndex], color = 'red', linewidth=5)
plt.xlabel('Total bill')
plt.ylabel('Tip')
plt.show();

ML Lab Manual PDF

Uploaded by

Document Information

Original Description:

Original Title

Copyright

Available Formats

Share this document

Share or Embed Document

Sharing Options

Did you find this document useful?

Is this content inappropriate?

Copyright:

Available Formats

ML Lab Manual PDF

Uploaded by

Copyright:

Available Formats

1.

with open('enjoysport.csv', 'r') as csvFile:

print("\n The initial value of hypothesis: ")

# Comparing with First Training Example

# Comparing with Remaining Training Examples of Given Data Set

print("\n Candidate Elimination algorithm Hypotheses Version Space Computation\n")

print(" For Training Example No :{0} the hypothesis is S{0} ".format(i+1),S)

print(" For Training Example No :{0} the hypothesis is S{0} ".format(i+1),S)

InfoGain = total_entropy - Weighted_Entropy

elif len(features) ==0:

item_values = [InfoGain(data,feature,target_attribute_name) for feature in

## Initialize random weights and biases

# Hidden layer first Perceptron

##input for each perceptron of hidden layer

# Output layer forward pass

##error computation for hidden layer

## update weight and biases

def calculate_probability(x, mean, stdev):

train_size = int(0.7 * dataset_size) # 70 % as test data

# Separate Data based on class value

# Find mean and variance of each attribute by adding all attributes

# Predict the output of test data

best_label, best_prob = None, -1

print("Accuracy : ", correct / (float(len(X_test))) * 100)

#confusion mtx output

# Getting the values and plotting it

kmeans = KMeans(3, random_state=0)

from sklearn.model_selection import train_test_split

import matplotlib.pyplot as plt

def kernel(point,xmat, k):

# load data points

#preparing and add 1 in bill

You might also like