Download as pdf or txt
Download as pdf or txt
You are on page 1of 8

CSE1

006Foundat
ionsf
orDat
aAnal
yti
csELA
Fal
l2022-
2023Semes
ter
Lab-
8 (
L45+L46)and(
L39+L40)

1.
USEDI ABETES.
CSV(
Diabet
esDat
asetL
ink
:ht
tps
:/
/www.
kaggl
e.
com/
dat
aset
s/mat
hchi
/di
abet
es-
data-
set
)(

2.
DISPLAYTHEDATAFRAME

3.
HOW MANYROWSANDCOLUMNSARETHERE?

4.
FINDOUTTHECOLUMNSNAMESI
NTHEDATAFRAME

5.
ACCESSTHEAGECOLUMN.

6.
DISPLAYTHENUMBEROFPEOPLEWHOSEAGEI
SGREATERTHAN40.

7.
FINDOUTTHEFEMALEDI
ABETI
CPATI
ENTSOFAGE>30

8.
FINDOUTTHEDETAI
LSOFPATI
ENTSWHOARENOTFROMLoui
sa.

9.
IFGLUCOSELEVELSI
NBL
OODI
S>7,
DIAGNOSEASDI
ABETI
CBYADDI
NGACOLUMNTO

THEDATAFRAME.

1
0.WHI
CHFEMALESUBJ
ECTSFROMBUCKI
NGHAMAREUNDERTHEAGEOF25?

1
1.WHATI
STHEI
RAVERAGEGLYHB?

1
2.AREANYOFTHEMDI
ABETI
C?

1
3.FI
NDOUTEACHCOLUMNTYPEI
NTHEDATAFRAME

1
4.PRODUCETHESUMMARYOFTHEDATAFRAME.

1
5.Cr
eat
ethef
oll
owi
ngdat
afr
ame,
aft
erwar
dsi
nver
tSexf
oral
li
ndi
vi
dual
s.

1
6.Createt
hisdat
afr
ame(
mak
esur
eyoui
mpor
tthevar
iabl
eWor
ki
ngasc
har
act
erandnot
f
actor)
.
Addt
hisdat
afr
amec
olumn-
wis
etot
hepr
evi
ousone.

a)Howmanyr
owsandc
olumnsdoest
henewdat
afr
amehave?

b)Whatc
las
sofdat
aisi
neac
hcol
umn?

1
7.Chec
kwhatc
las
sofdat
aist
he(
bui
l
t-i
ndat
aset
)st
ate.
cent
erandc
onver
titt
odat
afr
ame.

1
8.Cr
eat
eas
impl
edat
afr
amef
rom3vec
tor
s.Or
dert
heent
ir
edat
afr
amebyt
hef
ir
stc
olumn.

19.
Createafunct
iont
hatgivenavec
torandani
ntegerwi
l
lret
urnhowmanyt
imest
hei
nteger
appearsi
nsi
dethevect
or.

Readt hefol
lowingcont
entandprac
tic
et heexamplesin
thelab.Thec odeandoutputoft
heex amplessubmitas
assi
gnment .
Thedplyri
sapowerf
ulR-packaget
omani
pul
ate,c
leanandsummari
zeuns
truc
tur
eddat
a.I
n
shor
t,i
tmakesdat
aexpl
orati
onanddat
amani
pulat
ioneas
yandfas
tinR.

What
'ss
pec
ial
aboutdpl
yr?
Thepac kage"dpl
yr"c omprisesmanyf unc
tionsthatperfor
m mos tl
yuseddat amani pulati
on
operati
onssuchasappl yi
ngf il
ter
,sel
ecti
ngs peci
fi
cc ol
umns,sort
ingdata,addingordel eti
ng
col
umnsandaggr egatingdat a.Anothermos ti
mportantadvantageofthispackageist hati
t'
s
very easyt olearn and us e dplyrfuncti
ons.Also,easyt or ecal
lthesef unc
tions.For
example, 
fi
l
ter
()
 i
sus edt of
il
terrows .

I
mpor
tantdpl
yrFunc
tionst
oremember

dpl
yrFunc
tion Des
cri
pti
on Equi
val
entSQL
s
elec
t() Sel
ect
ingc
olumns(
var
iabl
es) SELECT
f
il
ter
() Fi
l
ter(
subs
et)r
ows
. WHERE
gr
oup_
by(
) Gr
oupt
hedat
a GROUPBY
s
ummar
is
e() Summar
is
e(oraggr
egat
e)dat
a -
ar
range(
) Sor
tthedat
a ORDERBY
j
oi
n() J
oini
ngdat
afr
ames(
tabl
es) J
OIN
mut
ate(
) Cr
eat
ingNewVar
iabl
es COLUMNALI
AS
fi
lt
er()method
Thef il
ter
()f
unct
ionisusedt oproducethesubs etofthedatathatsati
sf
iesthec onditi
on
speci
f i
edinthefil
ter
()method.I
nt hecondit
ion,wec anus econdi
ti
onaloperator
s,logical
operators
,NAvalues,r
angeoperator
setc
. t
ofil
teroutdata.
Syntaxoffi
l
ter
()funct
ionisgiven
below-
f
il
ter
(dat
afr
ameName,
condi
ti
on)

#importdpl
yrpac
kage
l
ibr
ary(
dplyr
)

#creat
eadataframe
st
ats<-dat
a.f
rame(pl
ayer
=c(
'
A','
B'
,'C'
,'
D')
,
runs=c(
100,200,
408,1
9),
wickets
=c(1
7,20,NA,
5))

#fet
chplayerswhoscor
edmor
e
#than100runs
f
il
ter
(st
ats
,runs>1
00)

dis
ti
nct
()method
Thedis
tinc
t()methodr
emovesdupli
cat
erowsfrom dat
afr
ameorbas
edont
hes
pec
ifi
ed
col
umns.Thesynt
axofdi
st
inc
t()met
hodisgi
venbel
ow-
di
st
inc
t(dat
afr
ameName,
col
1,c
ol2,
.
.,.
keep_
all
=TRUE)
#impor tdplyrpac k
age
l
ibrary(
dplyr)
 
#c r
eateadat aframe 
stats<-data.f
r ame( player=c(
'A',
'B'
,'
C',
'D'
,'
A',
'A'
)
,
 
   
   
  
  
  
  
 r
uns=c (
100,200, 408,19,56,1
00),
 
   
   
  
  
  
  
 wickets=c(17,20,NA, 5,2,1
7))
 
#r emovesdupl i
cater ows
disti
nct(
stats)
 
#removedupl ic atesbas edonac ol
umn
disti
nct(
stats,
pl ayer,.keep_al
l=TRUE)

arr
ange(
)met hod
I
nR, t
hearrange()met
hodisusedt
oordert
her
owsbas
edonas
pec
ifi
edc
olumn.
Thes
ynt
ax
ofarr
ange(
)met hodi
sspec
ifi
edbel
ow-
ar
range(
dat
afr
ameName,
col
umnName)
#impor tdplyrpackage
l
ibrary(
dplyr
)
 
#c r
eateadat afr
ame 
stats<-data.f
rame(player
=c(
'
A',
'B'
,'
C'
,'
D')
,
 
  
  
  
  
  
  
  
 r
uns=c(100,200,
408,1
9),
 
  
  
  
  
  
  
  
 wic
kets=c(1
7,20,NA,
5))
 
#ordereddatabas edonruns
ar
range(st
ats,r
uns )

selec
t()method
Thes elec
t()methodi
susedtoex t
racttherequir
edc ol
umnsasat abl
ebyspecif
yingthe
requir
edc ol
umnnamesinsel
ect
()method.Thesyntaxofs
elec
t()met
hodisment
ionedbelow-
s
elec
t(dat
afr
ameName,
col
1,
col
2,
…)

#impor tdpl yrpac k


age
l
ibrary(dplyr
)
 
#c r
eateadat af r
ame 
stats<-dat a.frame( player=c('
A',
'B'
,'
C','
D')
,
 
   
   
  
  
   
  
runs =c (
100,200, 408,19),
 
   
   
  
  
   
  
wic kets=c(17,20,NA, 5))
 
#f et
c hrequir edc ol
umndat a
select(
stats,pl ayer,
wic ket
s)
rename( )met hod
Ther ename( )func t
ionisus edtoc hangethec
olumnnames
.Thi
scanbedonebyt
hebel
ow
syntax-
r
ename(
dat
afr
ameName,
newName=ol
dName)
#impor tdplyrpac kage
l
ibrary(
dplyr)
 
#c reat
eadat aframe 
stats<-data.frame( player
=c('
A',
'B'
,'
C'
,'
D')
,
 
   
   
  
  
  
  
 r
uns =c(
1 00,200,408,19),
 
   
   
  
  
  
  
 wickets=c(17,20,NA,5))
 
#r enamingt hec olumn
r
ename( s
tats,runs _
scored=runs)

mut ate()&tr
ansmute(
)methods
Thes e methodsare used tocreat
e new vari
abl
es.The mut at
e()func
tion c
reatesnew
variableswit
houtdr
oppingtheoldonesbuttr
ansmute(
)functi
ondropstheoldvari
ablesand
createsnewvar i
abl
es.Thesynt
axofbothmethodsi
sment i
onedbelow-
mut
ate(
dat
afr
ameName,
newVar
iabl
e=f
ormul
a)
t
rans
mut
e(dat
afr
ameName,
newVar
iabl
e=f
ormul
a)
#impor tdplyrpackage
l
ibrary(
dplyr)
 
#c r
eateadat afr
ame 
stats<-data.f
rame(player
=c(
'A'
,'
B'
,'
C'
,'
D')
,
 
   
  
   
  
  
  
 r
uns=c(1
00, 200,
408,1
9),
 
  
  
  
  
  
  
  
 wickets=c(
17,20,7,
5))
 
#addnewc olumnavg
mutate(
stats,avg=runs/4)
 
#dropal landc r
eat eanewc ol
umn
t
ransmute(stats,avg=r
uns/4)

summari
ze(
)method
Usi
ngthes ummari
zemethodwec ansummari
zethedatai
nthedatafr
amebyusing
aggr
egat
efuncti
onsli
kes
um()
,mean()
,et
c.Thesynt
axofs
ummari
ze(
)met
hodi
sspec
ifi
ed
bel
ow-
s
ummar
ize(
dat
afr
ameName,
aggr
egat
e_f
unc
tion(
col
umnName)
)
#impor tdplyrpac k age
l
ibrary(
dplyr)
 
#c r
eateadat aframe 
stats<-data.frame( player=c(
'A'
,'
B'
,'
C',
'D'
),
 
   
   
  
  
  
  
 r
uns=c (100, 200,408,1
9),
 
   
   
  
  
  
  
 wickets=c (
17,20,7,5))
 
#s ummar iz
emet hod
summar i
ze(st
at s
, sum( runs)
,mean(r
uns))

DESCRIPTIVESTATI
STI
CS
(HISTOGRAM)

Cr
eat
edat
afort
hegr
aph
x
=9,
13,
21,
8,36,
22,
12,
41,
31,
33,
19

Cr
eat
ethehi
st
ogr
am.
mai
nindi
cat
est
it
leoft
hec
har
t.

c
oli
sus
edt
osetc
oloroft
hebar
s.

bor
deri
sus
edt
osetbor
derc
olorofeac
hbar
.

x
labi
sus
edt
ogi
vedes
cri
pti
onofX-
axi
s.

Pr
ogr
am:
x
=c(
9,1
3,21
,8,
36,
22,
12,
41,
31,
33,
19)
hi
st
(x,
col
="bl
ue"
,bor
der
="r
ed"
,xl
ab="
Wei
ght
",mai
n="
His
togr
amofXi
nRpr
ogr
ammi
ng"
)
out
put
DESCRIPTI
VESTATI
STI
CS
(BARDIAGRAM)
Cr
eat
edat
afort
hec
har
t
Ex
pendi
tur
e Wages Mat
eri
al
s Tax
ati
on Pr
ofi
ts Admi
ni
st
rat
ion
Company 250 220 360 1
30 40

Cr
eat
etheBarDi
agr
am.
mai
nindi
cat
est
it
leoft
hec
har
t.

c
oli
sus
edt
osetc
oloroft
hebar
s.

bor
deri
sus
edt
osetbor
derc
olorofeac
hbar
.

x
labi
sus
edt
ogi
vedes
cri
pti
onofX-
axi
s.

yl
abi
sus
edt
ogi
vedes
cri
pti
onofY-
xax
is.

name.
argi
savec
torofnamesappear
ingundereac
hbar
Pr
ogr
am:
x
=c(
250,
220,
360,
130,
40)
y=c
("Wages
","
Mat
eri
al
s"
,"
Tax
ati
on"
,"
Prof
it
s",
"Admi
n")

bar
plot(
x,names
.ar
g=y,xl
ab="Ex
pendi
tur
e",
ylab="
Company"
,col
="gr
een"
,
bor
der="or
ange"
,main="
Expendi
tur
eChart
")

DESCRI
PTI
VESTATI
STI
CS
(
PIEChar
t)

Cr
eat
edat
afort
hegr
aph
Fl
avor Appl
e Mang Pi
neappl St
rawber
ry Or
ange Gr
apes
o e
Sal
esQuant
it
y 500 650 450 580 350 400

Cr
eat
ethec
olor
sfort
hegr
aph
Cr
eat
etheBarDi
agr
am.
mai
nindi
cat
est
it
leoft
hec
har
t.

c
oli
sus
edt
osetc
oloroft
hebar
s.

Cr
eat
ethel
egendf
orgr
aph
Pr
ogr
am:
x
=c(
500,
650,
450,
580,
350,
400)
l
abel
s=c
("Appl
e",
"Mango"
,"
Pineappl
e",
"St
rawber
ry"
,"
Orange"
,"
Grapes
")
c
ols
=c(
"r
ed"
,"
green"
,"
Yel
l
ow"
,"
pink
","
orange"
,"
vi
ol
et"
,"
blue"
)
pi
e(x
,label
s,c
ol=c
ols
,mai
n="
Sal
esQuant
it
y")
l
egend(
"t
opri
ght"
,c(
"Apple"
,"
Mango"
,"
Pineappl
e",
"St
rawber
ry"
,"
Orange"
,
"
Grapes
")
,cex=0.
5,f
il
l=c
ols)

You might also like