Professional Documents
Culture Documents
Q2 and Q3: Python 3
Q2 and Q3: Python 3
Python 3
Not Trusted
File
Edit
View
Insert
Cell
Kernel
Help
CodeMarkdownRaw NBConvertHeading-
In [5]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import warnings
In [6]:
df = pd.read_csv("ObesityDataSet_raw_and_data_sinthetic.csv")
df
Out[6]:
S
G family_his F F C
He We N M S F T NObe
en Ag tory_with A C CA H CA MTR
ig igh C O C A U yesda
de e _overweig V V EC 2 LC ANS
ht t P K C F E d
r ht C C O
E
00 80 me 00 00 00 que eight_
al 000 .
00 00 o 0 tim 00 o 00 00 ntl g Level_
e 000 0
00 00 es 00 0 00 y I
29. 1. So 2. 0.0 0. So
M 53. 3 Norma
00 62 y 2. me 00 n 00 00 me Autom
5 al 000 no . no l_Wei
00 00 es 0 tim 00 o 00 00 tim obile
e 000 0 ght
00 00 es 00 0 00 es
Fe 23. 1. So 2. 1.0 0. So
55. 3 Norma
m 00 50 y 3. me 00 n 00 00 me Motor
6 000 yes . no l_Wei
al 00 00 es 0 tim 00 o 00 00 tim bike
000 0 ght
e 00 00 es 00 0 00 es
Fe 29. 1. So 2. 0.0 0.
78. 1 Obesit
1 m 00 53 y 2. me 00 n 00 00 Autom
000 no . no no y_Typ
7 al 00 00 es 0 tim 00 o 00 00 obile
000 0 e_I
e 00 00 es 00 0 00
Fe 52. 1. So 2. 0.0 0.
87. 1 Obesit
2 m 00 69 y 3. me ye 00 n 00 00 Autom
000 yes . no y_Typ
1 al 00 00 es 0 tim s 00 o 00 00 obile
000 0 e_I
e 00 00 es 00 0 00
Fe 22. 1. So 2. 1.0 0. So
60. 3 Norma
2 m 00 65 y 3. me 00 n 00 00 me Autom
000 yes . no l_Wei
2 al 00 00 es 0 tim 00 o 00 00 tim obile
000 0 ght
e 00 00 es 00 0 00 es
00 00 y 00 0 00
.. .. ..
... ... ... ... ... ... ... ... ... ... ... ... ... ... ...
. . .
8 e 00 20 6 es 05 0 08 es on
1 m 77 62 .37 me 50 25 48 me _Trans
. y_Typ
0 al 75 82 870 es 0 tim 66 o 78 41 tim portati
0 e_III
0 e 65 05 2 es 31 7 65 es on
mea
24.353090 1.702674 86.858730 2.421466 2.701179 2.004749 1.012812 0.663035
n
108.01590
75% 26.000000 1.769491 3.000000 3.000000 2.466193 1.678102 1.000000
7
173.00000
max 61.000000 1.980000 3.000000 4.000000 3.000000 3.000000 2.000000
0
In [14]:
sum = 0
count = 0
for age in df['Age']:
sum += age
count += 1
print(count)
avg = sum/count
print(avg)
2087
24.3530897973
In [54]:
ct1=pd.crosstab(df['Gender'],df['SMOKE'])
ct1
Out[54]:
SMOKE no yes
Gender
102
Female 15
0
102
Male 29
3
In [52]:
ct2 = pd.crosstab(df['MTRANS'],df['NObeyesdad'])
ct2
Out[52]:
Insufficient Normal_ Obesity_ Obesity_T Obesity_T Overweight Overweight_
NObeyesdad
_Weight Weight Type_I ype_II ype_III _Level_I Level_II
MTRANS
Automobile 46 44 110 95 1 66 94
Bike 0 4 0 1 0 2 0
Motorbike 0 6 3 0 0 1 1
Public_Trans
215 197 236 200 323 198 189
portation
Walking 6 31 2 1 0 9 6
In [25]:
# Assuming ct1 is your DataFrame
ct1.plot(kind='bar', stacked=True, rot=0, color=['red', 'blue'])
plt.xlabel('smoke')
plt.ylabel('count')
plt.title('cross-tabulation of smoke and Gender') # Fixed missing closing parenthesis
plt.legend(['NO', 'Yes'])
plt.show()
In [30]:
df_encoded = pd.get_dummies(df)
plt.figure(figsize=(34,34))
sns.heatmap(df_encoded.corr(),annot = True , linewidth=0.5,fmt='0.2f')
plt.show()
In [51]:
fig, ax = plt.subplots(1, 3, figsize=(20, 6))
sns.distplot(df['Age'], ax=ax[0], kde=False)
sns.distplot(df[df['Gender'] == 'Male']['Age'], ax=ax[1], kde=True, label='Male')
sns.distplot(df[df['Gender'] == 'Female']['Age'], ax=ax[1], kde=True, label='Female')
sns.boxplot(x=df['Gender'], y=df['Age'], ax=ax[2])
ax[1].legend()
plt.suptitle("Visualizing AGE column", size=20)
plt.show()