Professional Documents
Culture Documents
Data Science Programming Lab Assessment-6: Importing The Packages and Loading The Dataset
Data Science Programming Lab Assessment-6: Importing The Packages and Loading The Dataset
Data Science Programming Lab Assessment-6: Importing The Packages and Loading The Dataset
E.UTHESH GANAPATHY
LAB ASSESSMENT-6
QUESTION
A <- read.csv("C:/Users/Hariharan/Documents/CSV/CUSTOMER.csv")
head(A)
sapply(A, function(x) sum(is.na(x)))
A <- na.omit(A)
str(A)
unique(A$MONTH)
19MID0056
E.UTHESH GANAPATHY
summary(A[,c(1:2)])
table(A$REVENUE)
table(A$WEEKEND)
table(A$VISITOR_TYPE)
table(A$TRAFFIC_TYPE)
table(A$REGION)
table(A$BROWSER)
table(A$OS)
table(A$MONTH)
19MID0056
E.UTHESH GANAPATHY
STEP 3 – VISUALIZING
library(corrplot)
correlation <- cor(A[,c(1:5)])
corrplot(correlation, method = "square", type = "lower", diag = TRUE)
library(ggplot2)
options(repr.plot.width = 8, repr.plot.height = 5)
ggplot(A, mapping = aes(x = BOUNCE_RATES, y = EXIT_RATES,inherit.aes =
FALSE)) + geom_point(mapping = aes(color = REVENUE)) + geom_smooth(se =
TRUE, alpha = 0.5) + theme_light() + ggtitle("RELATION BW BOUNCE AND EXIT
RATES") + xlab("BOUNCE RATES") + ylab("EXIT RATES") + geom_text(mapping =
aes(x = 0.15, y = 0.05, label = "Correlation = 0.91"))
19MID0056
E.UTHESH GANAPATHY
library(ggplot2)
table(A$REVENUE, A$VISITOR_TYPE)
options(repr.plot.width = 10, repr.plot.height = 6)
p1 <- ggplot(A, mapping = aes(x = REVENUE)) + geom_bar(mapping = aes(fill =
VISITOR_TYPE)) + theme_light() + ggtitle("REVENUE BASED VISITOR TYPE") +
xlab("REVENUE STATUS(0/1)") + ylab("VISITORS") + theme(legend.position =
"bottom")
options(repr.plot.width = 10, repr.plot.height = 6)
p2 <- ggplot(A, mapping = aes(x = REVENUE)) + geom_bar(mapping = aes(fill =
WEEKEND)) + theme_light() + ggtitle("REVENUE BASED WEEKEND STATUS") +
xlab("REVENUE STATUS (0/1)") + ylab("VISITORS") + theme(legend.position =
"bottom")
grid.arrange(p1,p2, nrow = 1)
19MID0056
E.UTHESH GANAPATHY
options(repr.plot.width = 8, repr.plot.height = 5)
p1 <- ggplot(A, mapping = aes(x = REVENUE)) + geom_bar(mapping = aes(fill =
OS)) + theme_light() + ggtitle("RELATIONSHIP BW OS AND REVENUE") +
xlab("REVENUE") + ylab("OS") + theme(legend.position = "bottom")
options(repr.plot.width = 8, repr.plot.height = 5)
p1 <- ggplot(A, mapping = aes(x = REGION)) + geom_bar(mapping = aes(fill =
REVENUE)) + theme_light() + ggtitle("RELATIONSHIP BW REGION AND
REVENUE") + xlab("REGION") + ylab("REVENUE") + theme(legend.position =
"BOTTOM")
19MID0056
E.UTHESH GANAPATHY
library(dplyr)
library(ggplot2)
library(xlsx)
library(reshape2)
library(corrplot)
A <- read.csv("C:/Users/Hariharan/Documents/CSV/AGRICULTURE.csv")
B <- read.csv("C:/Users/Hariharan/Documents/CSV/CROPS.csv")
str(A)
head(B)
19MID0056
E.UTHESH GANAPATHY
library(DT)
A <- read.csv("C:/Users/Hariharan/Documents/CSV/COURSE.csv")
head(A)
str(A)
19MID0056
E.UTHESH GANAPATHY
A <- read.csv("C:/Users/Hariharan/Documents/CSV/COURSE.csv")
A <- A[1:1000, ]
A <- select(A, COUNTRY, NATIONAL_RANK)
head(A, 5)
dim(A)