ML Lab

PROGRAMS :
1. The probability that it is Friday and that a student is absent is 3 %. Since there are 5
school days in aweek, the probability that it is Friday is 20 %. What is the probability that
a student is absent given that today is Friday? Apply Baye’s rule in python to get the result
#given
prob_fri_and_abs = 3/100
prob_fri = 20/100
# p(abs/fri) = p(fri & abs)/p(fri)
prob_abs_on_fri = prob_fri_and_abs/prob_fri
print(f'Probability that a student is absent on friday is {prob_abs_on_fri} %')
Output :
Probability that a student is absent on Friday is 15%

2. Implement k-nearest neighbours classification using python
import pandas as pd
df = pd.read_csv("knn.csv")
attendance = int(input("Enter attendance "))
marks = int(input("Enter Marks "))
dist = [[],[]]
for i in range(0,30):
x = df['Attendance'][i]
y = df['Marks'][i]
distance = ((x-attendance)**2) + ((y-marks)**2)
dist[0].append(distance)
dist[1].append(i)
tuple = [(dist[0][i], dist[1][i]) for i in range(0, 30)]
tuple.sort()
k = int(input('Enter k '))
Pass = 0
Fail = 0
for i in range(0,k):
a = tuple[i][1]
if df['Result'][a] == 'Pass':
Pass += 1
else:
Fail += 1
if Pass > Fail:
result = 'Pass'
else:
result = 'Fail'
print(f'{attendance} {marks} {result}')
Input :
Enter attendance 90
Enter marks 25
Enter k 5
Output :
90 25 Pass
3. Implement k-means clustering using python
import pandas as pd
import matplotlib.pyplot as plt
df = pd.read_csv("k means.csv")
list = [0]*30
initial = [0]*5
ele = int(input("Enter the centriod "))
ele -= 1
list[ele] = 1
initial[i] = ele
c= [[None],
[None],
[None],
[None],
[None]]
c[i][0] = initial[i]+1
if list[i] == 0:
mini = 100000
elem = 0
a = df['Attendance'][i]
b = df['Marks'][i]
for j in range(0,5):
meana = 0
meanm = 0
for k in range(0,len(c[j])):
x = c[j][k]
meana += df['Attendance'][x-1]
meanm += df['Marks'][x-1]
meana = meana/len(c[j])
meanm = meanm/len(c[j])
dist = ((meana - a)**2) + ((meanm - b)**2)
if dist < mini :
mini = dist
elem = j
c[elem].append(i+1)
df.plot(kind = 'scatter',x = 'Attendance',y='Marks')
Input :
Enter the centroids : 2 6 13 16 26
Output :
[[2, 11, 20, 21],
[6, 1, 4, 8, 15, 17, 19, 24, 28],
[13, 5, 10, 14, 25, 26, 27],
[16, 3, 7, 12, 18, 29],
[23, 9, 22, 30]]
4. Implement linear regression using python.
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

df = pd.read_csv('/content/sample_data/california_housing_test.csv')
x = df['median_income']
y = df['median_house_value']
meanx = np.mean(x)
meany = np.mean(y)
length = len(x)
n=0
d=0
for i in range(length):
n += (x[i] - meanx)*(y[i] - meany)
d += (x[i] - meanx)**2
m = n/d
c = meany - (m * meanx)
xmin = np.min(x)
xmax = np.max(x)
line_x = np.linspace(xmin, xmax)
line_y = m*line_x + c
plt.plot(line_x, line_y, color = 'blue', linewidth = 4)
plt.scatter(x, y, 8, color = 'red')

X = float(input('Enter median income : '))
Y = round((m*X + c), 2)
print(f'The estimated median house value is : {Y}')
Output :
Enter median income : 8.44
The estimated median house value is : 395938.23

5. Implement Naïve Bayes Classification
import pandas as pd
df = pd.read_csv('golf_df.csv')
unique_result = df.Play.unique()
res = {}
tot = {}
for result in unique_result :
a=0
for i in df.index:
if df.Play[i] == result:
a += 1
tot[result] = a
res[result] = a/len(df)
print(res)
def findprob(attribute, unique_att, dic):
for att in unique_att:
for result in unique_result:
a=0
for i in df.index:
if df[attribute][i] == att:
if df.Play[i] == result:
a += 1
r = round(a/tot[result],3)
dic[str(att)+'-'+result] = r
print(dic)
unique_outlook = df.Outlook.unique()
outlook = {}
findprob('Outlook', unique_outlook, outlook)
unique_temp = df.Temperature.unique()
temp = {}
findprob('Temperature', unique_temp, temp)
unique_hum = df.Humidity.unique()
hum = {}
findprob('Humidity', unique_hum, hum)
unique_windy = df.Windy.unique()
windy = {}
findprob('Windy', unique_windy, windy)
def calculate(o, t, h, w):
for result in unique_result:
P[result] = outlook[o+'-'+result] * temp[t+'-'+result] * hum[h+'-'+result] * windy[w+'-'+result]
final = max(zip(P.values(), P.keys()))[1]
return(final)
n = int(input('Enter number of new entries '))

P = {}
for i in range(n):
o = (input('Enter Outlook ')).lower()
t = (input('Enter Temperature ')).lower()
h = (input('Enter Humidity ')).lower()
w = (input('Enter Windy ')).title()
final = calculate(o, t, h, w)
print(f'The result if the person will play is {final}')
Output :
{'no': 0.4, 'yes': 0.6}
{'sunny-no': 0.667, 'sunny-yes': 0.222, 'overcast-no': 0.0, 'overcast-yes': 0.444, 'rainy-no': 0.333, 'rainy-yes': 0.333}
{'hot-no': 0.333, 'hot-yes': 0.222, 'mild-no': 0.5, 'mild-yes': 0.444, 'cool-no': 0.167, 'cool-yes': 0.333}
{'high-no': 0.667, 'high-yes': 0.333, 'normal-no': 0.333, 'normal-yes': 0.667}
{'False-no': 0.333, 'False-yes': 0.667, 'True-no': 0.667, 'True-yes': 0.333}
Enter number of new entries 1
Enter Outlook sunny
Enter Temperature mild
Enter Humidity normal
Enter Windy true
The result if the person will play is no

6. Implement an algorithm to demonstrate the significance of genetic algorithm
import random
#input
parent = []
for i in range(12):
binary = input(f'Enter 8-bit parent {i+1} ')
parent.append(binary)
parent_set = []
available = [0,1,2,3,4,5,6,7,8,9,10,11]
for i in range(6):
p = []
for j in range(2):
r = random.randint(0, len(available)-1)
p.append(parent[available[r]])
available.pop(r)
parent_set.append(p)
#crossover
single = int(input('Enter single crossover point '))
double1 = int(input('Enter double crossover point 1 '))
double2 = int(input('Enter double crossover point 2 '))
crossover = []
def cross(a, b):
a1 = a[:single] + b[single:]
b1 = b[:single] + a[single:]
a2 = a[:double1] + b[double1:double2] + a[double2:]
b2 = b[:double1] + a[double1:double2] + b[double2:]
ab = [a1, b1, a2, b2]
crossover.extend(ab)
for i in range(6):
a = parent_set[i][0]
b = parent_set[i][1]
cross(a,b)
#mutation
x = random.randint(0, 23)
y = random.randint(0, 7)
mutate = crossover[x]
if mutate[y] == '0':
crossover[x] = mutate[:y] + '1' + mutate[y+1:]
else :
crossover[x] = mutate[:y] + '0' + mutate[y+1:]
#print output
for i in range(6):
i2 = i*4
print(f'The crossover children of pair {i+1} : {crossover[i2]} {crossover[i2+1]} {crossover[i2+2]}

{crossover[i2+3]}')
print(f'The child {x} was mutated at position {y} from {mutate} to {crossover[x]}')
Input :
Enter 8-bit parent 1 11001001

Enter single crossover point 5
Enter double crossover point 1 1
Enter double crossover point 2 6
Output :
The crossover children of parent 1 : 01011100 01010011 11011011 01010100
The child 20 was mutated at position 6 from 00010011 to 00010001

7. Implement Back-propagation algorithm
import numpy as np
x = np.array([[0.2, 0.8, 0.3],[0.8, 0.2, 0.3],[0.3, 0.9, 0.4],[0.7, 0.4, 0.6]])
y = np.array([[0.3 ,0.8], [0.8, 0.3],[0.4, 0.9],[0.6, 0.5]])
learning_rate = 0.2
n = len(y)
input_size = x.shape[1]
hidden_size = 3
output_size = y.shape[1]
w1 = np.random.normal(scale = 0.5, size = (input_size, hidden_size))
w2 = np.random.normal(scale = 0.5, size = (hidden_size, output_size))
def sigmoid(x):
return 1/(1 + np.exp(-x))
def mean_sq_error(pred, true):
return ((pred - true)**2).sum()/(2*pred.size)
def back_propogation(num):
global w1, w2
for i in range(num):
#forward phase
hidden_in = np.dot(x, w1)
hidden_out = sigmoid(hidden_in)
output_in = np.dot(hidden_out, w2)
output_out = sigmoid(output_in)
mse = mean_sq_error(output_out, y)
#back propogation phase
output_error = output_out - y
output_delta = output_error * output_out * (1-output_out)
hidden_error = np.dot(output_delta, w2.T)
hidden_delta = hidden_error * hidden_out * (1-hidden_out)
#update weights
w1 -= learning_rate * (np.dot(x.T, hidden_error)/n)
w2 -= learning_rate * (np.dot(hidden_out.T, output_error)/n)
print(f'Mean square error : {mse}')
num = int(input('Enter number of iterations '))
back_propogation(num)
a = [0.2, 0.8, 0.3]
hidden_in = np.dot(a, w1)
hidden_out = sigmoid(hidden_in)
output_in = np.dot(hidden_out, w2)
output_out = sigmoid(output_in)
output_out
Output :
Enter number of iterations 1000
Mean square error : 0.0018373157925288387

[0.330978 0.8504427]
8. Implement Decision Tree (ID3) using python
import pandas as pd
import numpy as np
from numpy import log2 as log
eps = np.finfo(float).eps
df = pd.read_csv('golf_df.csv')
for key in df.keys()[:-1]:
df[key] = df[key].astype(str).str.lower()
result = df.keys()[-1]
unique_result = df[result].unique()
def find_entropy(df):
entropy = 0
for value in unique_result:
fraction = df[result].value_counts()[value]/len(df[result])
entropy += -fraction*np.log2(fraction)
return entropy
att_ent = {}
def find_entropy_attribute(df,att):
variables = df[att].unique()
ent2 = 0
for var in variables:
ent = 0
for res in unique_result:
num = len(df[att][df[att]==var][df[result] == res])
total = len(df[att][df[att]==var])
frac = num/(total+eps)
ent += -frac*log(frac+eps)
frac2 = total/len(df)
ent2 += -frac2*ent
att_ent[var] = -frac2*ent
return abs(ent2)
def find_winner(df):
IG = []
for key in df.keys()[:-1]:

IG.append(find_entropy(df)-find_entropy_attribute(df,key))
return df.keys()[:-1][np.argmax(IG)]
def sub_table(df, node, value):
return df[df[node] == value].reset_index(drop=True)
def buildtree(df, tree=None):
result = df.keys()[-1]
winner = find_winner(df)
attValue = np.unique(df[winner])
if tree is None:
tree={}
tree[winner] = {}
for value in attValue:
subtable = sub_table(df,winner,value)
clValue,counts = np.unique(subtable[result],return_counts=True)
if len(counts)==1:
tree[winner][value] = clValue[0]
else:
tree[winner][value] = buildtree(subtable)
return tree
ID3 = buildtree(df)
print(ID3)
def predict(inputs, tree):
for nodes in tree.keys():
value = inputs[nodes]
tree = tree[nodes][value]
prediction = 0
if type(tree) is dict:
prediction = predict(inputs, tree)
else:
prediction = tree
break;
return prediction
inputs = {}
for column in df.keys()[:-1].tolist():
inputs[column] = (input(f'Enter {column} ')).lower()
print(f'The result if the user will eat is : {predict(inputs, ID3)}')
Output :
{'Outlook': {'overcast': 'yes',
'rainy': {'Windy': {'false': 'yes', 'true': 'no'}},
'sunny': {'Humidity': {'high': 'no', 'normal': 'yes'}}}}
Enter Outlook rainy
Enter Temperature MiLD
Enter Humidity HigH
Enter Windy faLse
The result if the user will eat is : yes
9. Write a python program to create a list of data and read the data
import pandas as pd
dataset = [ ['John', 513], ['Krishna', 522], ['Mahesh', 525],
['Sandeep', 202], ['Akhil', 521], ['Swaroop', 507] ]
df = pd.DataFrame(dataset, columns = ['Name','Roll No'])
roll = int(input('Enter Roll No '))
if roll in df['Roll No'].to_list():
print(f'Name is {df["Name"][df["Roll No"] == roll].values[0]}')
else:
print("Invalid Roll No")
Output :
Enter Roll No 513
Name is John
10. Write a python program to calculate the conditional and unconditional
probabilities of an attribute
import pandas as pd
dataset = [ ['Male', 'Single', 20, 45000], ['Female', 'Married', 28, 42000],
['Male', 'Married', 30, 60000], ['Male', 'Married', 44, 60000], ['Female', 'Single', 25, 40000],
['Male', 'Single', 30, 22000], ['Female', 'Single', 24, 30000]]
df = pd.DataFrame(dataset, columns = ['Gender', 'Marital Status', 'Age', 'Salary'])
ranges = pd.cut(df.Salary, bins = [0, 30001, 50000, 100000], labels = ['Low', 'Medium', 'High'])
df.insert(df.shape[1],'Salary ranges', ranges)
unique_ranges = df['Salary ranges'].unique()
unique_m = df['Marital Status'].unique()
r = (df['Salary ranges'].value_counts())
m = (df['Marital Status'].value_counts())
for i in unique_ranges:
uncond = r[i]/len(df)
print(f'Unconditional probability of {i} : {uncond}')
for j in unique_m:
x = len(df['Age'][df["Salary ranges"] == i][df['Marital Status'] == j].values)
cond = x/m[j]
print(f'Probability of {i} salary for {j} status is : {cond}')
Output :
Unconditional probability of Medium : 0.42857142857142855
Probability of Medium salary for Single status is : 0.5

Probability of Medium salary for Married status is : 0.3333333333333333
Unconditional probability of High : 0.2857142857142857
Probability of High salary for Single status is : 0.0
Probability of High salary for Married status is : 0.6666666666666666
Unconditional probability of Low : 0.2857142857142857
Probability of Low salary for Single status is : 0.5
Probability of Low salary for Married status is : 0.0

ML Lab

Uploaded by

Document Information

Original Description:

Copyright

Available Formats

Share this document

Share or Embed Document

Sharing Options

Did you find this document useful?

Is this content inappropriate?

Copyright:

Available Formats

ML Lab

Uploaded by

Copyright:

Available Formats

PROGRAMS :

# p(abs/fri) = p(fri & abs)/p(fri)

print(f'Probability that a student is absent on friday is {prob_abs_on_fri} %')

Probability that a student is absent on Friday is 15%

attendance = int(input("Enter attendance "))

marks = int(input("Enter Marks "))

distance = ((x-attendance)**2) + ((y-marks)**2)

tuple = [(dist[0][i], dist[1][i]) for i in range(0, 30)]

if Pass > Fail:

print(f'{attendance} {marks} {result}')

import matplotlib.pyplot as plt

ele = int(input("Enter the centriod "))

dist = ((meana - a)**2) + ((meanm - b)**2)

if dist < mini :

df.plot(kind = 'scatter',x = 'Attendance',y='Marks')

Enter the centroids : 2 6 13 16 26

[[2, 11, 20, 21],

[6, 1, 4, 8, 15, 17, 19, 24, 28],

[13, 5, 10, 14, 25, 26, 27],

[16, 3, 7, 12, 18, 29],

[23, 9, 22, 30]]

4. Implement linear regression using python.

import matplotlib.pyplot as plt

n += (x[i] - meanx)*(y[i] - meany)

line_x = np.linspace(xmin, xmax)

plt.plot(line_x, line_y, color = 'blue', linewidth = 4)

plt.scatter(x, y, 8, color = 'red')

print(f'The estimated median house value is : {Y}')

Enter median income : 8.44

The estimated median house value is : 395938.23

for result in unique_result :

def findprob(attribute, unique_att, dic):

for att in unique_att:

for result in unique_result:

findprob('Outlook', unique_outlook, outlook)

findprob('Temperature', unique_temp, temp)

findprob('Humidity', unique_hum, hum)

findprob('Windy', unique_windy, windy)

def calculate(o, t, h, w):

for result in unique_result:

P[result] = outlook[o+'-'+result] * temp[t+'-'+result] * hum[h+'-'+result] * windy[w+'-'+result]

final = max(zip(P.values(), P.keys()))[1]

n = int(input('Enter number of new entries '))

o = (input('Enter Outlook ')).lower()

t = (input('Enter Temperature ')).lower()

h = (input('Enter Humidity ')).lower()

w = (input('Enter Windy ')).title()

print(f'The result if the person will play is {final}')

{'no': 0.4, 'yes': 0.6}

{'high-no': 0.667, 'high-yes': 0.333, 'normal-no': 0.333, 'normal-yes': 0.667}

{'False-no': 0.333, 'False-yes': 0.667, 'True-no': 0.667, 'True-yes': 0.333}

Enter number of new entries 1

Enter Outlook sunny

Enter Temperature mild

Enter Humidity normal

Enter Windy true

The result if the person will play is no

binary = input(f'Enter 8-bit parent {i+1} ')

single = int(input('Enter single crossover point '))

double1 = int(input('Enter double crossover point 1 '))

double2 = int(input('Enter double crossover point 2 '))

def cross(a, b):

a2 = a[:double1] + b[double1:double2] + a[double2:]

distance = ((x-attendance)2) + ((y-marks)2)

dist = ((meana - a)2) + ((meanm - b)2)