Download as docx, pdf, or txt
Download as docx, pdf, or txt
You are on page 1of 16

R Programming – Assignment

Problem 1

a)

> 27*(38-17)

[1] 567

b)

> 14^7

[1] 105413504

c)

> sqrt(436/12)

[1] 6.027714

Problem 2

> a <- seq(5, 160, by=5) -(a sequence of numbers from 5 to 160 with an interval of 5)

>a

[1] 5 10 15 20 25 30 35 40 45 50 55 60 65 70 75 80 85 90 95 100 105 110 115 120 125 130


135 140 145 150 155 160

> b<- (87:56)

>b

[1] 87 86 85 84 83 82 81 80 79 78 77 76 75 74 73 72 71 70 69 68 67 66 65 64 63 62 61 60 59 58 57 56

> d= a*b

>d

[1] 435 860 1275 1680 2075 2460 2835 3200 3555 3900 4235 4560 4875 5180 5475 5760 6035 6300
6555 6800 7035 7260 7475 7680 7875 8060 8235 8400 8555 8700 8835 8960
a)

> d[19:21]

[1] 6555 6800 7035

b)

> d[d<2000]

[1] 435 860 1275 1680

Problem 3

a)

> sum(d)

[1] 175120

b)

> median(d)

[1] 5897.5

c)

> sd(d)

[1] 2608.563

Problem 4

a)

student<- c("Pranay","Ankita","Himanshu","Harshit","Srishti","Nitesh","Ajay","Nisha","Anshul","Kamya")

b)

> age<-c(22,25,23,21,25,22,26,24,23,24)

c)

> student

[1] "Pranay" "Ankita" "Himanshu" "Harshit" "Srishti" "Nitesh" "Ajay" "Nisha" "Anshul"
"Kamya"
> age

[1] 22 25 23 21 25 22 26 24 23 24

d)

> x<-c(4, 7, 10)

> student[x]

[1] "Harshit" "Ajay" "Kamya"

e)

> rbind(student,age)

[,1] [,2] [,3] [,4] [,5] [,6] [,7] [,8] [,9] [,10]

student "Pranay" "Ankita" "Himanshu" "Harshit" "Srishti" "Nitesh" "Ajay" "Nisha" "Anshul" "Kamya"

age "22" "25" "23" "21" "25" "22" "26" "24" "23" "24"

f)

> subject<-c("WCTB","OB","FA","FM","ME","BS","IS","OB","ME","WOC")

> rbind(student,age,subject)

[,1] [,2] [,3] [,4] [,5] [,6] [,7] [,8] [,9] [,10]

student "Pranay" "Ankita" "Himanshu" "Harshit" "Srishti" "Nitesh" "Ajay" "Nisha" "Anshul" "Kamya"

age "22" "25" "23" "21" "25" "22" "26" "24" "23" "24"

subject "WCTB" "OB" "FA" "FM" "ME" "BS" "IS" "OB" "ME" "WOC"

g)

> student<-append(student,"Ankur") -(append function is used to add a value to an object)

> age<-append(age,26)

> subject<-append(subject,"WCTB")

> rbind(student,age,subject)

[,1] [,2] [,3] [,4] [,5] [,6] [,7] [,8] [,9] [,10] [,11]
student "Pranay" "Ankita" "Himanshu" "Harshit" "Srishti" "Nitesh" "Ajay" "Nisha" "Anshul" "Kamya"
"Ankur"

age "22" "25" "23" "21" "25" "22" "26" "24" "23" "24" "26"

subject "WCTB" "OB" "FA" "FM" "ME" "BS" "IS" "OB" "ME" "WOC" "WCTB"

Problem 5

> vec<-list(1,2,3,4) -(“list” function is used to create lists)

> char_vec <- list("Hadoop", "Spark", "Flink", "Mahout")

> logic_vec <- list(TRUE, FALSE, TRUE, FALSE)

> vec

[[1]]

[1] 1

[[2]]

[1] 2

[[3]]

[1] 3

> char_vec

[[1]]

[1] "Hadoop"

[[2]]

[1] "Spark"

[[3]]
[1] "Flink"

[[4]]

[1] "Mahout"

> logic_vec

[[1]]

[1] TRUE

[[2]]

[1] FALSE

[[3]]

[1] TRUE

[[4]]

[1] FALSE

> char_vec[3]

[[1]]

[1] "Flink"

> fulllist<-list(vec,char_vec,logic_vec) -(a list of lists is created with the name “fulllist”)

> fulllist

[[1]]

[[1]][[1]]

[1] 1
[[1]][[2]]

[1] 2

[[1]][[3]]

[1] 3

[[1]][[4]]

[1] 4

[[2]]

[[2]][[1]]

[1] "Hadoop"

[[2]][[2]]

[1] "Spark"

[[2]][[3]]

[1] "Flink"

[[2]][[4]]

[1] "Mahout"

[[3]]

[[3]][[1]]

[1] TRUE
[[3]][[2]]

[1] FALSE

[[3]][[3]]

[1] TRUE

[[3]][[4]]

[1] FALSE

Problem 6

> getwd()

[1] "C:/Users/pranay/Downloads/WHO"

> str(WHO1) -(data structure, it shows the data type and first few observations of every variable)

'data.frame': 4031 obs. of 50 variables:

$ country : chr "Afghanistan" "Afghanistan" "Afghanistan" "Afghanistan" ...

$ iso2 : chr "AF" "AF" "AF" "AF" ... –(chr: character data)

$ iso3 : chr "AFG" "AFG" "AFG" "AFG" ...

$ iso_numeric : int 4 4 4 4 4 4 4 4 4 4 ... –(int: integer data)

$ g_whoregion : chr "EMR" "EMR" "EMR" "EMR" ...

$ year : int 2000 2001 2002 2003 2004 2005 2006 2007 2008 2009 ...

$ e_pop_num : int 20779953 21606988 22600770 23680871 24726684 25654277 26433049


27100536 27722276 28394813 ...

$ e_inc_100k : num 190 189 189 189 189 189 189 189 189 189 ... –(num: number data)

$ e_inc_100k_lo : num 123 123 122 122 122 122 122 122 122 123 ...

$ e_inc_100k_hi : num 271 271 270 270 270 270 270 270 270 270 ...

$ e_inc_num : int 39000 41000 43000 45000 47000 48000 50000 51000 52000 54000 ...
$ e_inc_num_lo : int 26000 26000 28000 29000 30000 31000 32000 33000 34000 35000 ...

$ e_inc_num_hi : int 56000 58000 61000 64000 67000 69000 71000 73000 75000 77000 ...

$ e_tbhiv_prct : num 0.32 0.34 0.21 0.26 0.28 0.31 0.32 0.4 0.39 0.41 ...

$ e_tbhiv_prct_lo : num 0.11 0.12 0.08 0.1 0.11 0.12 0.13 0.17 0.17 0.19 ...

$ e_tbhiv_prct_hi : num 0.65 0.68 0.42 0.5 0.53 0.57 0.59 0.72 0.68 0.72 ...

$ e_inc_tbhiv_100k : num 0.61 0.65 0.41 0.49 0.53 0.58 0.61 0.75 0.73 0.78 ...

$ e_inc_tbhiv_100k_lo : num 0.17 0.19 0.12 0.16 0.18 0.2 0.21 0.28 0.28 0.3 ...

$ e_inc_tbhiv_100k_hi : num 1.3 1.4 0.86 1 1.1 1.2 1.2 1.5 1.4 1.5 ...

$ e_inc_tbhiv_num : int 130 140 92 120 130 150 160 200 200 220 ...

$ e_inc_tbhiv_num_lo : int 35 41 28 37 43 51 56 75 77 86 ...

$ e_inc_tbhiv_num_hi : int 270 300 190 240 270 300 320 400 390 420 ...

$ e_mort_exc_tbhiv_100k : num 68 62 57 58 51 47 43 39 40 44 ...

$ e_mort_exc_tbhiv_100k_lo: num 39 36 33 34 30 28 25 23 24 26 ...

$ e_mort_exc_tbhiv_100k_hi: num 104 96 87 88 78 72 65 59 61 67 ...

$ e_mort_exc_tbhiv_num : int 14000 13000 13000 14000 13000 12000 11000 10000 11000 13000 ...

$ e_mort_exc_tbhiv_num_lo : int 8100 7800 7500 7900 7400 7100 6600 6200 6600 7400 ...

$ e_mort_exc_tbhiv_num_hi : int 22000 21000 20000 21000 19000 18000 17000 16000 17000
19000 ...

$ e_mort_tbhiv_100k : num 0.4 0.4 0.23 0.28 0.27 0.27 0.26 0.3 0.3 0.35 ...

$ e_mort_tbhiv_100k_lo : num 0.11 0.11 0.07 0.09 0.09 0.09 0.09 0.11 0.11 0.13 ...

$ e_mort_tbhiv_100k_hi : num 0.87 0.85 0.48 0.58 0.55 0.55 0.52 0.58 0.58 0.66 ...

$ e_mort_tbhiv_num : int 83 85 51 66 67 70 69 80 83 98 ...

$ e_mort_tbhiv_num_lo : int 22 24 15 20 22 24 24 29 31 38 ...

$ e_mort_tbhiv_num_hi : int 180 180 110 140 140 140 140 160 160 190 ...

$ e_mort_100k : num 68 63 57 58 52 47 43 39 41 45 ...

$ e_mort_100k_lo : num 39 36 33 34 30 28 25 23 24 26 ...


$ e_mort_100k_hi : num 105 96 87 89 78 72 65 59 61 68 ...

$ e_mort_num : int 14000 14000 13000 14000 13000 12000 11000 11000 11000 13000 ...

$ e_mort_num_lo : int 8200 7900 7500 8000 7500 7100 6700 6300 6700 7500 ...

$ e_mort_num_hi : int 22000 21000 20000 21000 19000 18000 17000 16000 17000 19000 ...

$ cfr : num NA NA NA NA NA NA NA NA NA NA ...

$ cfr_lo : num NA NA NA NA NA NA NA NA NA NA ...

$ cfr_hi : num NA NA NA NA NA NA NA NA NA NA ...

$ cfr_pct : int NA NA NA NA NA NA NA NA NA NA ...

$ cfr_pct_lo : int NA NA NA NA NA NA NA NA NA NA ...

$ cfr_pct_hi : int NA NA NA NA NA NA NA NA NA NA ...

$ c_newinc_100k : num 34 47 61 58 74 85 96 106 102 92 ...

$ c_cdr : num 18 25 32 31 39 45 51 56 54 49 ...

$ c_cdr_lo : num 13 17 23 22 28 32 36 39 38 34 ...

$ c_cdr_hi : num 28 38 50 48 61 70 79 87 83 75 ...

> summary(WHO1) -(summary fn, shows arithmetic averages like mean, median, mode, range etc.)

country iso2 iso3 iso_numeric

Length:4031 Length:4031 Length:4031 Min. : 4.0

Class :character Class :character Class :character 1st Qu.:212.0

Mode :character Mode :character Mode :character Median :430.0

Mean :430.6

3rd Qu.:643.0

Max. :894.0

g_whoregion year e_pop_num e_inc_100k

Length:4031 Min. :2000 Min. :1.126e+03 Min. : 0.0


Class :character 1st Qu.:2004 1st Qu.:7.431e+05 1st Qu.: 13.0

Mode :character Median :2009 Median :5.710e+06 Median : 48.0

Mean :2009 Mean :3.218e+07 Mean : 126.1

3rd Qu.:2014 3rd Qu.:2.024e+07 3rd Qu.: 163.0

Max. :2018 Max. :1.428e+09 Max. :1280.0

e_inc_100k_lo e_inc_100k_hi e_inc_num e_inc_num_lo

Min. : 0.0 Min. : 0.0 Min. : 0 Min. : 0

1st Qu.: 11.0 1st Qu.: 15.0 1st Qu.: 220 1st Qu.: 160

Median : 39.0 Median : 57.0 Median : 3000 Median : 2300

Mean : 88.2 Mean : 171.7 Mean : 50074 Mean : 33499

3rd Qu.:116.0 3rd Qu.: 221.0 3rd Qu.: 17000 3rd Qu.: 12000

Max. :908.0 Max. :1830.0 Max. :3200000 Max. :1870000

e_inc_num_hi e_tbhiv_prct e_tbhiv_prct_lo e_tbhiv_prct_hi

Min. : 0 Min. : 0.00 Min. : 0.000 Min. : 0.01

1st Qu.: 280 1st Qu.: 1.20 1st Qu.: 0.460 1st Qu.: 2.80

Median : 3700 Median : 5.70 Median : 2.600 Median : 9.85

Mean : 70631 Mean : 12.82 Mean : 8.581 Mean : 18.74

3rd Qu.: 22000 3rd Qu.: 17.00 3rd Qu.:10.000 3rd Qu.: 26.00

Max. :5240000 Max. :100.00 Max. :83.000 Max. :100.00

NA's :605 NA's :605 NA's :605

e_inc_tbhiv_100k e_inc_tbhiv_100k_lo e_inc_tbhiv_100k_hi e_inc_tbhiv_num

Min. : 0.00 Min. : 0.00 Min. : 0.00 Min. : 0.0

1st Qu.: 0.29 1st Qu.: 0.11 1st Qu.: 0.71 1st Qu.: 16.2
Median : 2.10 Median : 0.92 Median : 3.90 Median : 210.0

Mean : 38.05 Mean : 22.15 Mean : 59.11 Mean : 7214.2

3rd Qu.: 13.00 3rd Qu.: 6.50 3rd Qu.: 24.00 3rd Qu.: 1800.0

Max. :983.00 Max. :752.00 Max. :1380.00 Max. :332000.0

NA's :605 NA's :605 NA's :605 NA's :605

e_inc_tbhiv_num_lo e_inc_tbhiv_num_hi e_mort_exc_tbhiv_100k

Min. : 0.0 Min. : 0.0 Min. : 0.00

1st Qu.: 7.0 1st Qu.: 27.2 1st Qu.: 0.97

Median : 96.5 Median : 340.0 Median : 4.40

Mean : 3772.6 Mean : 12047.7 Mean : 15.28

3rd Qu.: 920.0 3rd Qu.: 3100.0 3rd Qu.: 19.00

Max. :242000.0 Max. :655000.0 Max. :188.00

NA's :605 NA's :605

e_mort_exc_tbhiv_100k_lo e_mort_exc_tbhiv_100k_hi e_mort_exc_tbhiv_num

Min. : 0.000 Min. : 0.00 Min. : 0

1st Qu.: 0.810 1st Qu.: 1.10 1st Qu.: 17

Median : 3.700 Median : 5.10 Median : 240

Mean : 9.651 Mean : 22.56 Mean : 6932

3rd Qu.: 13.000 3rd Qu.: 27.00 3rd Qu.: 1800

Max. :120.000 Max. :309.00 Max. :615000

e_mort_exc_tbhiv_num_lo e_mort_exc_tbhiv_num_hi e_mort_tbhiv_100k

Min. : 0 Min. : 0 Min. : 0.00

1st Qu.: 14 1st Qu.: 20 1st Qu.: 0.01

Median : 190 Median : 280 Median : 0.19


Mean : 5447 Mean : 8784 Mean : 11.82

3rd Qu.: 1200 3rd Qu.: 2500 3rd Qu.: 2.45

Max. :575000 Max. :656000 Max. :439.00

e_mort_tbhiv_100k_lo e_mort_tbhiv_100k_hi e_mort_tbhiv_num e_mort_tbhiv_num_lo

Min. : 0.000 Min. : 0.00 Min. : 0 Min. : 0

1st Qu.: 0.000 1st Qu.: 0.05 1st Qu.: 0 1st Qu.: 0

Median : 0.060 Median : 0.43 Median : 16 Median : 6

Mean : 6.461 Mean : 19.26 Mean : 2365 Mean : 1180

3rd Qu.: 1.000 3rd Qu.: 4.40 3rd Qu.: 330 3rd Qu.: 130

Max. :260.000 Max. :679.00 Max. :125000 Max. :63000

e_mort_tbhiv_num_hi e_mort_100k e_mort_100k_lo e_mort_100k_hi

Min. : 0 Min. : 0.00 Min. : 0.00 Min. : 0.00

1st Qu.: 1 1st Qu.: 1.10 1st Qu.: 0.87 1st Qu.: 1.30

Median : 30 Median : 4.90 Median : 4.10 Median : 5.60

Mean : 4101 Mean : 27.11 Mean : 18.06 Mean : 38.21

3rd Qu.: 580 3rd Qu.: 24.00 3rd Qu.: 17.00 3rd Qu.: 32.00

Max. :282000 Max. :538.00 Max. :333.00 Max. :791.00

e_mort_num e_mort_num_lo e_mort_num_hi cfr cfr_lo

Min. : 0 Min. : 0 Min. : 0 Min. :0.000 Min. :0.00

1st Qu.: 19 1st Qu.: 16 1st Qu.: 22 1st Qu.:0.070 1st Qu.:0.05

Median : 300 Median : 250 Median : 340 Median :0.100 Median :0.07

Mean : 9303 Mean : 7050 Mean : 11985 Mean :0.138 Mean :0.09
3rd Qu.: 2800 3rd Qu.: 1900 3rd Qu.: 3700 3rd Qu.:0.170 3rd Qu.:0.11

Max. :735000 Max. :622000 Max. :875000 Max. :0.740 Max. :0.61

NA's :3824 NA's :3824

cfr_hi cfr_pct cfr_pct_lo cfr_pct_hi c_newinc_100k

Min. :0.010 Min. : 0.00 Min. : 0.000 Min. : 1.00 Min. : 0

1st Qu.:0.080 1st Qu.: 7.00 1st Qu.: 5.000 1st Qu.: 8.00 1st Qu.: 11

Median :0.130 Median :10.00 Median : 7.000 Median :13.00 Median : 38

Mean :0.195 Mean :13.83 Mean : 8.976 Mean :19.53 Mean : 77

3rd Qu.:0.250 3rd Qu.:17.00 3rd Qu.:11.000 3rd Qu.:25.00 3rd Qu.: 97

Max. :0.940 Max. :74.00 Max. :61.000 Max. :94.00 Max. :949

NA's :3824 NA's :3824 NA's :3824 NA's :3824 NA's :150

c_cdr c_cdr_lo c_cdr_hi

Min. : 0.00 Min. : 0.0 Min. : 0.00

1st Qu.: 63.00 1st Qu.: 46.0 1st Qu.: 94.00

Median : 80.00 Median : 63.0 Median :100.00

Mean : 74.11 Mean : 60.2 Mean : 93.51

3rd Qu.: 87.00 3rd Qu.: 75.0 3rd Qu.:100.00

Max. :240.00 Max. :160.0 Max. :610.00

NA's :242 NA's :242 NA's :242


> plot(WHO1$e_inc_100k,WHO1$e_mort_exc_tbhiv_100k,xlab="Estimated No. of Incident Cases per
100k",ylab="Mortality Rate per 100k",main="WHO Data")

-(The plot suggests that there is more or less a direct relationship between the no. of incident cases
and the mortality rate, barring a few exceptions)

> boxplot(WHO1$e_inc_100k ~ WHO1$g_whoregion,xlab="Region",ylab="Estimated No. of


Incident Cases per 100k",main="WHO Data")
(The boxplot gives us a clear picture of the range of incident cases, the skewness of the data, the
mean of the data and shows the outliers clearly in the form of dots)

Problem 7

a)

> x <- data.frame("Student" = c("Ron","Jake","Ava","Sophia","Mia"),"Marks" = c(35,75,45,30,85))

> str(x)

'data.frame': 5 obs. of 2 variables:

$ Student: chr "Ron" "Jake" "Ava" "Sophia" ...

$ Marks : num 35 75 45 30 85

> x$Result = ifelse(x$"Marks">50,"Pass","Fail")

> print(x)

Student Marks Result

1 Ron 35 Fail

2 Jake 75 Pass

3 Ava 45 Fail
4 Sophia 30 Fail

5 Mia 85 Pass

b) “mymat” function forms a matrix with the number of rows and columns specified by the “nrow”
and “ncol” functions. “i” is the row number and “j” is the column number. “dim” refers to the
dimensions of the matrix.

> mymat <- matrix(nrow=20, ncol=20)

> for(i in 1:dim(mymat)[1]) {

+ for(j in 1:dim(mymat)[2]) {

+ mymat[i,j] = i*j}}

> mymat[1:10, 1:10]

[,1] [,2] [,3] [,4] [,5] [,6] [,7] [,8] [,9] [,10]

[1,] 1 2 3 4 5 6 7 8 9 10

[2,] 2 4 6 8 10 12 14 16 18 20

[3,] 3 6 9 12 15 18 21 24 27 30

[4,] 4 8 12 16 20 24 28 32 36 40

[5,] 5 10 15 20 25 30 35 40 45 50

[6,] 6 12 18 24 30 36 42 48 54 60

[7,] 7 14 21 28 35 42 49 56 63 70

[8,] 8 16 24 32 40 48 56 64 72 80

[9,] 9 18 27 36 45 54 63 72 81 90

[10,] 10 20 30 40 50 60 70 80 90 100

You might also like