Download as pdf or txt
Download as pdf or txt
You are on page 1of 3

import numpy as np

import pandas as pd
#creating a dataset
person = pd.DataFrame()
#creating target variable
person['Gender'] = ['male','male','male','male','female','female','female'
,'female']
#creating our feature variables
person['Height'] = [6,5.92,5.58,5.92,5,5.5,5.42,5.75]
person['Weight'] = [180,190,170,165,100,150,130,150]
person['Foot_Size'] = [12,11,12,10,6,8,7,9]
#view the data
print("\n Dataset")
print("")
print(data)
#create an empty data frame
data = pd.DataFrame()
#creating some feature values for this single row
data['Gender']=['male','male','male','male','male','male','male','male','m
ale','male','male','male','male','male','male','male','male','male','male'
,'male','female','female','female','female','female','female','female','fe
male','female','female','female','female','female','female','female','fema
le','female','female','female','female']
data['Height'] =[5.82,5.77,5.87,5.99,6.07,6.13,6.06,5.99,6.21,5.81,5.57,5.
15,6.02,5.93,5.91,5.63,5.86,5.93,5.59,5.77,5.60,5.40,5,5.75,5.7,5.2,5.1,5.
73,5.74,5,5.8,5.77,5.82,5.60,5.40,5,5.75,5.43,5.12,5.55]
data['Weight'] =[172,171,180,163,169,181,185,168,166,164,175,172,167,140,1
74,183,133,111,162,177,154,134,137,150,155,136,132,140,154,146,141,145,142
,158,155,155,152,150,139,160]
data['Foot_Size'] =[10,11,12,11,12,11,12,13,13,10,11,13,12,12,6,7,12,13,8,
9,7,6,5,9,5,6,5,7,6,5,5,9,5,7,6,6,9,12,9,10]
#view the data
print('\n Test Instance: ')
print(" ")
print(person)
n_male = data['Gender'][data['Gender'] == 'male'].count()
n_male
n_female = data['Gender'][data['Gender'] == 'female'].count()
n_female
#total rows
total_ppl = data['Gender'].count()
total_ppl
#no of males divided by the total rows
p_male = n_male / total_ppl #(4/8)
p_male
p_female = n_female / total_ppl #(4/8)
p_female
# group the data by gender & calculate the means of each feature
# for eg - height = (6+5.92+5.58+5.92) / 4
data_means = data.groupby('Gender').mean()
data_means
#calculate of mean
print('\n Dataset Mean')
print(" ")
print(data_means)
# calculate the data variance
# variance = summation of((mean - x) ** 2) / n
data_variance = data.groupby('Gender').var()
print(data_variance)
#mean for male
male_height_mean = data_means['Height'][data_means.index == 'male'].values
[0]
male_weight_mean = data_means['Weight'][data_means.index == 'male'].values
[0]
male_footsize_mean = data_means['Foot_Size'][data_means.index == 'male'].v
alues[0]
print("male_height_mean: ", male_height_mean)
print("male_weight_mean: ", male_weight_mean)
print("male_footsize_mean: ", male_footsize_mean)
#variance for male
male_height_variance = data_variance['Height'][data_variance.index == 'mal
e'].values[0]
male_weight_variance = data_variance['Weight'][data_variance.index == 'mal
e'].values[0]
male_footsize_variance = data_variance['Foot_Size'][data_variance.index ==
'male'].values[0]
print("male_height_variance: ",male_height_variance)
print("male_weight_variance: ",male_weight_variance)
print("male_footsize_variance: ",male_footsize_variance)
# for female now
# mean for female
female_height_mean = data_means['Height'][data_means.index == 'female'].va
lues[0]
female_weight_mean = data_means['Weight'][data_means.index == 'female'].va
lues[0]
female_footsize_mean = data_means['Foot_Size'][data_means.index == 'female
'].values[0]
print("female_height_mean: ", female_height_mean)
print("female_weight_mean: ", female_weight_mean)
print("female_footsize_mean: ", female_footsize_mean)
#variance for female
female_height_variance = data_variance['Height'][data_variance.index == 'f
emale'].values[0]
female_weight_variance = data_variance['Weight'][data_variance.index == 'f
emale'].values[0]
female_footsize_variance = data_variance['Foot_Size'][data_variance.index
== 'female'].values[
0]
print("female_height_variance: ",female_height_variance)
print("female_weight_variance: ",female_weight_variance)
print("female_footsize_variance: ",female_footsize_variance)
# create a function which calculates p(x|y)
def p_x_given_y(x,mean_y, variance_y):
#input the arguments into a probability density function
p = 1/(np.sqrt(2*np.pi*variance_y))* np.exp((-(x-
mean_y) ** 2)/(2*variance_y))
return p
count=0
# numerator of the posterior if the unclassified observation is a male
for i in range(len(person)):
print('\n Probability male: ')
prob_male = p_male*p_x_given_y(person['Height'][i],male_height_mean,ma
le_height_variance)*p_x_given_y(person['Weight'][i],male_weight_mean,male_
weight_variance)* p_x_given_y(person['Foot_Size'][i],male_footsize_mean,ma
le_footsize_variance)
print(prob_male)
print('\n Probability female: ')
prob_female = p_female*p_x_given_y(person['Height'][i],female_height_m
ean,female_height_variance)*p_x_given_y(person['Weight'][i],female_weight_
mean,female_weight_variance)*p_x_given_y(person['Foot_Size'][i],female_foo
tsize_mean,female_footsize_variance)
print(prob_female)
if(prob_male > prob_female):
print(f"target label: male for {i} ")
if(person['Gender'][i]=='male'):
count+=1
else:
print(f"target label: Female for {i} ")
if (person['Gender'][i]=='female'):
count+=1
print(f"Accuracy {((count)/8)*100}")

You might also like