Download as docx, pdf, or txt
Download as docx, pdf, or txt
You are on page 1of 24

PROGRAMS :

1. The probability that it is Friday and that a student is absent is 3 %. Since there are 5
school days in aweek, the probability that it is Friday is 20 %. What is the probability that
a student is absent given that today is Friday? Apply Baye’s rule in python to get the result

#given

prob_fri_and_abs = 3/100

prob_fri = 20/100

# p(abs/fri) = p(fri & abs)/p(fri)

prob_abs_on_fri = prob_fri_and_abs/prob_fri

print(f'Probability that a student is absent on friday is {prob_abs_on_fri} %')

Output :

Probability that a student is absent on Friday is 15%


2. Implement k-nearest neighbours classification using python

import pandas as pd

df = pd.read_csv("knn.csv")

attendance = int(input("Enter attendance "))

marks = int(input("Enter Marks "))

dist = [[],[]]

for i in range(0,30):

x = df['Attendance'][i]

y = df['Marks'][i]

distance = ((x-attendance)**2) + ((y-marks)**2)

dist[0].append(distance)

dist[1].append(i)

tuple = [(dist[0][i], dist[1][i]) for i in range(0, 30)]

tuple.sort()

k = int(input('Enter k '))

Pass = 0
Fail = 0

for i in range(0,k):

a = tuple[i][1]

if df['Result'][a] == 'Pass':

Pass += 1

else:

Fail += 1

if Pass > Fail:

result = 'Pass'

else:

result = 'Fail'

print(f'{attendance} {marks} {result}')

Input :

Enter attendance 90

Enter marks 25

Enter k 5

Output :

90 25 Pass
3. Implement k-means clustering using python

import pandas as pd

import matplotlib.pyplot as plt

df = pd.read_csv("k means.csv")

list = [0]*30

initial = [0]*5

for i in range(0,5):

ele = int(input("Enter the centriod "))

ele -= 1

list[ele] = 1

initial[i] = ele
c= [[None],

[None],

[None],

[None],

[None]]

for i in range(0,5):

c[i][0] = initial[i]+1

for i in range(0,30):

if list[i] == 0:

mini = 100000

elem = 0

a = df['Attendance'][i]

b = df['Marks'][i]

for j in range(0,5):

meana = 0

meanm = 0

for k in range(0,len(c[j])):

x = c[j][k]

meana += df['Attendance'][x-1]

meanm += df['Marks'][x-1]

meana = meana/len(c[j])

meanm = meanm/len(c[j])

dist = ((meana - a)**2) + ((meanm - b)**2)

if dist < mini :

mini = dist

elem = j
c[elem].append(i+1)

df.plot(kind = 'scatter',x = 'Attendance',y='Marks')

Input :

Enter the centroids : 2 6 13 16 26

Output :

[[2, 11, 20, 21],

[6, 1, 4, 8, 15, 17, 19, 24, 28],

[13, 5, 10, 14, 25, 26, 27],

[16, 3, 7, 12, 18, 29],

[23, 9, 22, 30]]

4. Implement linear regression using python.

import numpy as np

import pandas as pd

import matplotlib.pyplot as plt


df = pd.read_csv('/content/sample_data/california_housing_test.csv')

x = df['median_income']

y = df['median_house_value']

meanx = np.mean(x)

meany = np.mean(y)

length = len(x)

n=0

d=0

for i in range(length):

n += (x[i] - meanx)*(y[i] - meany)

d += (x[i] - meanx)**2

m = n/d

c = meany - (m * meanx)

xmin = np.min(x)

xmax = np.max(x)

line_x = np.linspace(xmin, xmax)

line_y = m*line_x + c

plt.plot(line_x, line_y, color = 'blue', linewidth = 4)

plt.scatter(x, y, 8, color = 'red')


X = float(input('Enter median income : '))

Y = round((m*X + c), 2)

print(f'The estimated median house value is : {Y}')

Output :

Enter median income : 8.44

The estimated median house value is : 395938.23


5. Implement Naïve Bayes Classification

import pandas as pd

df = pd.read_csv('golf_df.csv')

unique_result = df.Play.unique()

res = {}

tot = {}

for result in unique_result :

a=0

for i in df.index:

if df.Play[i] == result:

a += 1

tot[result] = a

res[result] = a/len(df)

print(res)

def findprob(attribute, unique_att, dic):

for att in unique_att:

for result in unique_result:

a=0

for i in df.index:

if df[attribute][i] == att:

if df.Play[i] == result:
a += 1

r = round(a/tot[result],3)

dic[str(att)+'-'+result] = r

print(dic)

unique_outlook = df.Outlook.unique()

outlook = {}

findprob('Outlook', unique_outlook, outlook)

unique_temp = df.Temperature.unique()

temp = {}

findprob('Temperature', unique_temp, temp)

unique_hum = df.Humidity.unique()

hum = {}

findprob('Humidity', unique_hum, hum)

unique_windy = df.Windy.unique()

windy = {}

findprob('Windy', unique_windy, windy)

def calculate(o, t, h, w):

for result in unique_result:

P[result] = outlook[o+'-'+result] * temp[t+'-'+result] * hum[h+'-'+result] * windy[w+'-'+result]

final = max(zip(P.values(), P.keys()))[1]

return(final)

n = int(input('Enter number of new entries '))


P = {}

for i in range(n):

o = (input('Enter Outlook ')).lower()

t = (input('Enter Temperature ')).lower()

h = (input('Enter Humidity ')).lower()

w = (input('Enter Windy ')).title()

final = calculate(o, t, h, w)

print(f'The result if the person will play is {final}')

Output :

{'no': 0.4, 'yes': 0.6}

{'sunny-no': 0.667, 'sunny-yes': 0.222, 'overcast-no': 0.0, 'overcast-yes': 0.444, 'rainy-no': 0.333, 'rainy-yes': 0.333}

{'hot-no': 0.333, 'hot-yes': 0.222, 'mild-no': 0.5, 'mild-yes': 0.444, 'cool-no': 0.167, 'cool-yes': 0.333}

{'high-no': 0.667, 'high-yes': 0.333, 'normal-no': 0.333, 'normal-yes': 0.667}

{'False-no': 0.333, 'False-yes': 0.667, 'True-no': 0.667, 'True-yes': 0.333}

Enter number of new entries 1

Enter Outlook sunny

Enter Temperature mild

Enter Humidity normal

Enter Windy true

The result if the person will play is no


6. Implement an algorithm to demonstrate the significance of genetic algorithm

import random

#input

parent = []

for i in range(12):

binary = input(f'Enter 8-bit parent {i+1} ')

parent.append(binary)

parent_set = []
available = [0,1,2,3,4,5,6,7,8,9,10,11]

for i in range(6):

p = []

for j in range(2):

r = random.randint(0, len(available)-1)

p.append(parent[available[r]])

available.pop(r)

parent_set.append(p)

#crossover

single = int(input('Enter single crossover point '))

double1 = int(input('Enter double crossover point 1 '))

double2 = int(input('Enter double crossover point 2 '))

crossover = []

def cross(a, b):

a1 = a[:single] + b[single:]

b1 = b[:single] + a[single:]

a2 = a[:double1] + b[double1:double2] + a[double2:]

b2 = b[:double1] + a[double1:double2] + b[double2:]

ab = [a1, b1, a2, b2]

crossover.extend(ab)

for i in range(6):

a = parent_set[i][0]

b = parent_set[i][1]

cross(a,b)
#mutation

x = random.randint(0, 23)

y = random.randint(0, 7)

mutate = crossover[x]

if mutate[y] == '0':

crossover[x] = mutate[:y] + '1' + mutate[y+1:]

else :

crossover[x] = mutate[:y] + '0' + mutate[y+1:]

#print output

for i in range(6):

i2 = i*4

print(f'The crossover children of pair {i+1} : {crossover[i2]} {crossover[i2+1]} {crossover[i2+2]}


{crossover[i2+3]}')

print(f'The child {x} was mutated at position {y} from {mutate} to {crossover[x]}')

Input :

Enter 8-bit parent 1 11001001

Enter 8-bit parent 2 10010011

Enter 8-bit parent 3 11001010

Enter 8-bit parent 4 01001100

Enter 8-bit parent 5 00100111

Enter 8-bit parent 6 01010011

Enter 8-bit parent 7 10011100

Enter 8-bit parent 8 01011100

Enter 8-bit parent 9 00010011

Enter 8-bit parent 10 11101100


Enter 8-bit parent 11 10100011

Enter 8-bit parent 12 11011100

Enter single crossover point 5

Enter double crossover point 1 1

Enter double crossover point 2 6

Output :

The crossover children of parent 1 : 01011100 01010011 11011011 01010100

The crossover children of parent 2 : 01000011 10101100 10100100 01001011

The crossover children of parent 3 : 00101001 11000111 11001111 00100001

The crossover children of parent 4 : 11001100 11101010 11101010 11001100

The crossover children of parent 5 : 10011100 01011100 01011100 10011100

The crossover children of parent 6 : 00010001 10010011 10010011 00010011

The child 20 was mutated at position 6 from 00010011 to 00010001


7. Implement Back-propagation algorithm

import numpy as np

x = np.array([[0.2, 0.8, 0.3],[0.8, 0.2, 0.3],[0.3, 0.9, 0.4],[0.7, 0.4, 0.6]])

y = np.array([[0.3 ,0.8], [0.8, 0.3],[0.4, 0.9],[0.6, 0.5]])

learning_rate = 0.2

n = len(y)

input_size = x.shape[1]

hidden_size = 3

output_size = y.shape[1]

w1 = np.random.normal(scale = 0.5, size = (input_size, hidden_size))

w2 = np.random.normal(scale = 0.5, size = (hidden_size, output_size))

def sigmoid(x):

return 1/(1 + np.exp(-x))

def mean_sq_error(pred, true):

return ((pred - true)**2).sum()/(2*pred.size)

def back_propogation(num):

global w1, w2

for i in range(num):

#forward phase
hidden_in = np.dot(x, w1)

hidden_out = sigmoid(hidden_in)

output_in = np.dot(hidden_out, w2)

output_out = sigmoid(output_in)

mse = mean_sq_error(output_out, y)

#back propogation phase

output_error = output_out - y

output_delta = output_error * output_out * (1-output_out)

hidden_error = np.dot(output_delta, w2.T)

hidden_delta = hidden_error * hidden_out * (1-hidden_out)

#update weights

w1 -= learning_rate * (np.dot(x.T, hidden_error)/n)

w2 -= learning_rate * (np.dot(hidden_out.T, output_error)/n)

print(f'Mean square error : {mse}')

num = int(input('Enter number of iterations '))

back_propogation(num)

a = [0.2, 0.8, 0.3]

hidden_in = np.dot(a, w1)

hidden_out = sigmoid(hidden_in)

output_in = np.dot(hidden_out, w2)

output_out = sigmoid(output_in)

output_out

Output :

Enter number of iterations 1000

Mean square error : 0.0018373157925288387


[0.330978 0.8504427]

8. Implement Decision Tree (ID3) using python

import pandas as pd

import numpy as np

from numpy import log2 as log

eps = np.finfo(float).eps

df = pd.read_csv('golf_df.csv')

for key in df.keys()[:-1]:

df[key] = df[key].astype(str).str.lower()

result = df.keys()[-1]

unique_result = df[result].unique()
def find_entropy(df):

entropy = 0

for value in unique_result:

fraction = df[result].value_counts()[value]/len(df[result])

entropy += -fraction*np.log2(fraction)

return entropy

att_ent = {}

def find_entropy_attribute(df,att):

variables = df[att].unique()

ent2 = 0

for var in variables:

ent = 0

for res in unique_result:

num = len(df[att][df[att]==var][df[result] == res])

total = len(df[att][df[att]==var])

frac = num/(total+eps)

ent += -frac*log(frac+eps)

frac2 = total/len(df)

ent2 += -frac2*ent

att_ent[var] = -frac2*ent

return abs(ent2)

def find_winner(df):

IG = []

for key in df.keys()[:-1]:


IG.append(find_entropy(df)-find_entropy_attribute(df,key))

return df.keys()[:-1][np.argmax(IG)]

def sub_table(df, node, value):

return df[df[node] == value].reset_index(drop=True)

def buildtree(df, tree=None):

result = df.keys()[-1]

winner = find_winner(df)

attValue = np.unique(df[winner])

if tree is None:

tree={}

tree[winner] = {}

for value in attValue:

subtable = sub_table(df,winner,value)

clValue,counts = np.unique(subtable[result],return_counts=True)

if len(counts)==1:

tree[winner][value] = clValue[0]

else:

tree[winner][value] = buildtree(subtable)

return tree

ID3 = buildtree(df)

print(ID3)

def predict(inputs, tree):

for nodes in tree.keys():

value = inputs[nodes]
tree = tree[nodes][value]

prediction = 0

if type(tree) is dict:

prediction = predict(inputs, tree)

else:

prediction = tree

break;

return prediction

inputs = {}

for column in df.keys()[:-1].tolist():

inputs[column] = (input(f'Enter {column} ')).lower()

print(f'The result if the user will eat is : {predict(inputs, ID3)}')

Output :

{'Outlook': {'overcast': 'yes',

'rainy': {'Windy': {'false': 'yes', 'true': 'no'}},

'sunny': {'Humidity': {'high': 'no', 'normal': 'yes'}}}}

Enter Outlook rainy

Enter Temperature MiLD

Enter Humidity HigH

Enter Windy faLse

The result if the user will eat is : yes

9. Write a python program to create a list of data and read the data
import pandas as pd

dataset = [ ['John', 513], ['Krishna', 522], ['Mahesh', 525],

['Sandeep', 202], ['Akhil', 521], ['Swaroop', 507] ]

df = pd.DataFrame(dataset, columns = ['Name','Roll No'])

roll = int(input('Enter Roll No '))

if roll in df['Roll No'].to_list():

print(f'Name is {df["Name"][df["Roll No"] == roll].values[0]}')

else:

print("Invalid Roll No")

Output :

Enter Roll No 513

Name is John
10. Write a python program to calculate the conditional and unconditional
probabilities of an attribute

import pandas as pd

dataset = [ ['Male', 'Single', 20, 45000], ['Female', 'Married', 28, 42000],

['Male', 'Married', 30, 60000], ['Male', 'Married', 44, 60000], ['Female', 'Single', 25, 40000],

['Male', 'Single', 30, 22000], ['Female', 'Single', 24, 30000]]

df = pd.DataFrame(dataset, columns = ['Gender', 'Marital Status', 'Age', 'Salary'])

ranges = pd.cut(df.Salary, bins = [0, 30001, 50000, 100000], labels = ['Low', 'Medium', 'High'])

df.insert(df.shape[1],'Salary ranges', ranges)

unique_ranges = df['Salary ranges'].unique()

unique_m = df['Marital Status'].unique()

r = (df['Salary ranges'].value_counts())

m = (df['Marital Status'].value_counts())

for i in unique_ranges:

uncond = r[i]/len(df)

print(f'Unconditional probability of {i} : {uncond}')

for j in unique_m:

x = len(df['Age'][df["Salary ranges"] == i][df['Marital Status'] == j].values)

cond = x/m[j]

print(f'Probability of {i} salary for {j} status is : {cond}')

Output :

Unconditional probability of Medium : 0.42857142857142855

Probability of Medium salary for Single status is : 0.5


Probability of Medium salary for Married status is : 0.3333333333333333

Unconditional probability of High : 0.2857142857142857

Probability of High salary for Single status is : 0.0

Probability of High salary for Married status is : 0.6666666666666666

Unconditional probability of Low : 0.2857142857142857

Probability of Low salary for Single status is : 0.5

Probability of Low salary for Married status is : 0.0

You might also like