Data Science Programming Lab Assessment-6: Importing The Packages and Loading The Dataset

19MID0056
E.UTHESH GANAPATHY
DATA SCIENCE PROGRAMMING
LAB ASSESSMENT-6
QUESTION
1. WRITE AN R CODE TO PERFORM BEHAVIOUR ANALYSIS OF

CUSTOMERS FOR ANY ONLINE PURCHASE MODEL. CONSIDER
ANY DATA SET
STEP 1 - IMPORTING THE PACKAGES AND LOADING THE DATASET
A <- read.csv("C:/Users/Hariharan/Documents/CSV/CUSTOMER.csv")
head(A)
sapply(A, function(x) sum(is.na(x)))
A <- na.omit(A)
str(A)
unique(A$MONTH)
19MID0056
E.UTHESH GANAPATHY
STEP 2 – FIXING THE STRUCTURE OF DATA AND USING

DECRIPTIVE ANALYSIS
A$REVENUE <- gsub(FALSE, 0, A$REVENUE)

A$REVENUE <- gsub(TRUE, 1, A$REVENUE)
A$WEEKEND <- gsub(TRUE, 1, A$WEEKEND)
A$WEEKEND <- gsub(FALSE, 0, A$WEEKEND)
A$MONTH <- factor(A$MONTH, levels = c("Feb", "Mar", "May", "June", "Jul", "Aug",
"Sep", "Oct", "Nov", "Dec"), ordered = TRUE)
A$OS <- factor(A$OS)
A$BROWSER <- factor(A$BROWSER)
A$REGION <- factor(A$REGION)
A$TRAFFIC_TYPES <- factor(A$TRAFFIC_TYPES)
A$VISITOR_TYPE <- factor(A$VISITOR_TYPE)
A$REVENUE <- factor(A$REVENUE)
A$WEEKEND <- factor(A$WEEKEND)
str(A)
19MID0056
E.UTHESH GANAPATHY
summary(A[,c(1:2)])
table(A$REVENUE)
table(A$WEEKEND)
table(A$VISITOR_TYPE)
table(A$TRAFFIC_TYPE)
table(A$REGION)
table(A$BROWSER)
table(A$OS)
table(A$MONTH)
19MID0056
E.UTHESH GANAPATHY
STEP 3 – VISUALIZING
library(corrplot)
correlation <- cor(A[,c(1:5)])
corrplot(correlation, method = "square", type = "lower", diag = TRUE)
library(ggplot2)
options(repr.plot.width = 8, repr.plot.height = 5)
ggplot(A, mapping = aes(x = BOUNCE_RATES, y = EXIT_RATES,inherit.aes =
FALSE)) + geom_point(mapping = aes(color = REVENUE)) + geom_smooth(se =
TRUE, alpha = 0.5) + theme_light() + ggtitle("RELATION BW BOUNCE AND EXIT
RATES") + xlab("BOUNCE RATES") + ylab("EXIT RATES") + geom_text(mapping =
aes(x = 0.15, y = 0.05, label = "Correlation = 0.91"))
19MID0056
E.UTHESH GANAPATHY
library(ggplot2)
table(A$REVENUE, A$VISITOR_TYPE)
p1 <- ggplot(A, mapping = aes(x = REVENUE)) + geom_bar(mapping = aes(fill =
VISITOR_TYPE)) + theme_light() + ggtitle("REVENUE BASED VISITOR TYPE") +
xlab("REVENUE STATUS(0/1)") + ylab("VISITORS") + theme(legend.position =
"bottom")
WEEKEND)) + theme_light() + ggtitle("REVENUE BASED WEEKEND STATUS") +
xlab("REVENUE STATUS (0/1)") + ylab("VISITORS") + theme(legend.position =
"bottom")
grid.arrange(p1,p2, nrow = 1)
19MID0056
E.UTHESH GANAPATHY
OS)) + theme_light() + ggtitle("RELATIONSHIP BW OS AND REVENUE") +
xlab("REVENUE") + ylab("OS") + theme(legend.position = "bottom")
p1 <- ggplot(A, mapping = aes(x = REGION)) + geom_bar(mapping = aes(fill =
REVENUE)) + theme_light() + ggtitle("RELATIONSHIP BW REGION AND
REVENUE") + xlab("REGION") + ylab("REVENUE") + theme(legend.position =
"BOTTOM")
19MID0056
E.UTHESH GANAPATHY
2. WRITE AN R CODE TO PERFORM AGRICULTURAL DATA

ANALYSIS FOR YIELD PREDICTION AND CROP SELECTION ON
INDIAN TERRAIN DATA SET.
STEP 1 – IMPORTING THE DATASET
library(dplyr)
library(ggplot2)
library(xlsx)
library(reshape2)
library(corrplot)
A <- read.csv("C:/Users/Hariharan/Documents/CSV/AGRICULTURE.csv")
B <- read.csv("C:/Users/Hariharan/Documents/CSV/CROPS.csv")
str(A)
head(B)
19MID0056
E.UTHESH GANAPATHY
STEP 2 – RENAMING THE DATA
A <- A %>% rename(rice = 2, jowar = 3, bajra = 4, maize = 5, ragi = 6, millets = 7,

wheat = 8)
A <- A %>% rename(barley = 9, tcereals = 10, gram = 11, tur = 12, otherpulses = 13,
totalpulses = 14)
A <- A %>% rename(totalgrains = 15, gnuts = 16, sesame = 17, mustard = 18,
linseed = 19)
A <- A %>% rename(castor = 20, totaloilseeds = 21, cotton = 22, jute = 23, mesta =
24, tea = 25)
A <- A %>% rename(coffee = 26, rubber = 27, banana = 28, sugarcane = 29,
tobacco = 30, potatoes = 31)
A <- A %>% rename(pepper = 32, chilles = 33, ginger = 34, coconut = 35, turmeric =
36)
cormat <- round(cor(as.matrix(A[,2:15])),2)

get_lower_tri <- function(cormat){
cormat[upper.tri(cormat)] <- NA
return(cormat)
}
lowertri <- get_lower_tri(cormat)
melted.cormat <- melt(lowertri, na.rm = TRUE)
sorted.cormat <- melted.cormat[order(melted.cormat$value),]
STEP 3 – VISUALIZING THE DATA
neg.cormat <- head(sorted.cormat, 6)

ggplot(data = neg.cormat, aes(x = Var1, y = Var2, fill = value)) + geom_tile(color =
"red")
19MID0056
E.UTHESH GANAPATHY
pos.cormat <- sorted.cormat[c(50:80),]

ggplot(data = pos.cormat, aes(x = Var1, y = Var2, fill = value)) + geom_tile(color =
"red")
STEP 4 – COMBINING 2 DATASETS AND VISUALIZING THE DATA
AB<-merge(data.frame(A, row.names=NULL), data.frame(B, row.names=NULL),

by = 0, all = TRUE)[-1]
C <- melt(AB, id = "ANN", measure = c("rice", "maize"))

ggplot(data = C, aes(x = ANN, y = value, color = variable)) + geom_point() +
xlab("ANNUAL RAINFALL") + ylab("GRAIN PRODUCTION")
19MID0056
E.UTHESH GANAPATHY
ggplot(AB, aes(x = ANN, y = mesta)) + geom_point()+ xlab("ANNUAL RAINFALL") +

ylab("CROP PRODUCTION")
ggplot(AB, aes(x = ANN, y = maize)) + geom_point()+ xlab("ANNUAL RAINFALL") +

ylab("GRAIN PRODUCTION")
19MID0056
E.UTHESH GANAPATHY
3. WRITE AN R CODE TO DEVELOP A RECOMMENDER SYSTEM FOR

ANY REAL-WORLD PROBLEM (WHEN A USER QUERIES TO FIND
THE UNIVERSITY THAT OFFERS PYTHON, THE SYSTEM SHOULD
DISPLAY RANK WISE LIST OF THE UNIVERSITY BASED ON THE
REVIEW GIVEN BY THE CUSTOMERS)
STEP 1 - IMPORTING THE PACKAGES AND LOADING THE DATASET
library(DT)
A <- read.csv("C:/Users/Hariharan/Documents/CSV/COURSE.csv")
head(A)
str(A)
19MID0056
E.UTHESH GANAPATHY
STEP 2 – VISUALIZING THE DATA
height<- sort(table(A$COUNTRY), decreasing = TRUE)

barplot(height[1:5], las = 3, main = "TOP COUNTRIES RANKING IN 2014")
plot (A$FACULTY_QUALITY, A$RANK, xlab ="FACULTY QUALITY", ylab =

"RANK", main = "FACULTY QUALITY VS RANK")
c <- lm(RANK ~ FACULTY_QUALITY, data = A)
abline(c)
19MID0056
E.UTHESH GANAPATHY
plot (A$INFLUENCE, A$RANK, xlab ="INFLUENCE", ylab="RANK", main =

"INFLUENCE VS RANK")
c <- lm(RANK ~ INFLUENCE, data = A)
abline(c)
plot (A$CITATIONS, A$RANK, xlab = "CITATIONS", ylab = "RANK", main =

"CITATIONS VS RANK")
c <- lm(RANK ~ CITATIONS, data = A)
abline(c)
19MID0056
E.UTHESH GANAPATHY
STEP 3 – USING APRIORI ALGORITHM
A <- read.csv("C:/Users/Hariharan/Documents/CSV/COURSE.csv")
A <- A[1:1000, ]
A <- select(A, COUNTRY, NATIONAL_RANK)
head(A, 5)
dim(A)
matrix1 <- as(split(A[ , "COUNTRY"], A[ , "NATIONAL_RANK"]), "transactions")

matrix1
ruleParameters <- list(supp = 0.01, conf = 0.75, maxlen = 2)
associationRules <- apriori(matrix1, parameter = ruleParameters)
summary(associationRules)
19MID0056
E.UTHESH GANAPATHY
set.seed(240) plot(associationRules, method = "graph", measure = "support",

shading = "lift", main = "Association Rules Graph")
rules <- sapply(associationRules$rules, function(x){

x = gsub("[\\{\\}]", "", regmatches(x, gregexpr("\\{.*\\}", x))[[1]])
x = gsub("=>",",",x)
x = str_replace_all(x," ","")
return( x )
})
rules <- as.character(rules)

rules <- str_split(rules, ",")
associationRules$movieLeftSide <- sapply( rules, "[[", 1)
associationRules$movieRightSide <- sapply( rules, "[[", 2)
associationRules$movieLeftSide <- as.numeric(associationRules$movieLeftSide)
associationRules$movieRightSide <- as.numeric(associationRules$movieRightSide)
associationRules$rules <- NULL
associationRules <- associationRules %>% left_join(A, by = c("movieLeftSide" =
"COUNTRY"))
associationRules$movieLeftSide <- NULL

columnNames <- colnames(associationRules)
columnNames[5] <- str_c("Left_", columnNames[5])
columnNames[7:25] <- str_c("Left_", columnNames[7:25])
colnames(associationRules) <- columnNames
associationRules <- associationRules %>% left_join(A, by = c("movieRightSide" =
"NATIONAL_RANK"))
19MID0056
E.UTHESH GANAPATHY
associationRules$movieRightSide <- NULL

columnNames <- colnames(associationRules)
columnNames[26:45] <- str_c("Right_", columnNames[26:45])
colnames(associationRules) <- columnNames
colnames(associationRules)
associationRules <- arrange(associationRules, desc(lift))

associationRules <- select(associationRules, Left_title, Left_year, Right_title,
Right_year, support, confidence, lift)
head(associationRules)

Data Science Programming Lab Assessment-6: Importing The Packages and Loading The Dataset

Uploaded by

Copyright:

Available Formats

You might also like

Data Science Programming Lab Assessment-6: Importing The Packages and Loading The Dataset

Uploaded by

Document Information

Original Description:

Original Title

Copyright

Available Formats

Share this document

Share or Embed Document

Sharing Options

Did you find this document useful?

Is this content inappropriate?

Copyright:

Available Formats

Data Science Programming Lab Assessment-6: Importing The Packages and Loading The Dataset

Uploaded by

Copyright:

Available Formats

19MID0056

DATA SCIENCE PROGRAMMING

1. WRITE AN R CODE TO PERFORM BEHAVIOUR ANALYSIS OF

STEP 1 - IMPORTING THE PACKAGES AND LOADING THE DATASET

STEP 2 – FIXING THE STRUCTURE OF DATA AND USING

A$REVENUE <- gsub(FALSE, 0, A$REVENUE)

2. WRITE AN R CODE TO PERFORM AGRICULTURAL DATA

STEP 1 – IMPORTING THE DATASET

STEP 2 – RENAMING THE DATA

A <- A %>% rename(rice = 2, jowar = 3, bajra = 4, maize = 5, ragi = 6, millets = 7,

cormat <- round(cor(as.matrix(A[,2:15])),2)

STEP 3 – VISUALIZING THE DATA

neg.cormat <- head(sorted.cormat, 6)

pos.cormat <- sorted.cormat[c(50:80),]

STEP 4 – COMBINING 2 DATASETS AND VISUALIZING THE DATA

AB<-merge(data.frame(A, row.names=NULL), data.frame(B, row.names=NULL),

C <- melt(AB, id = "ANN", measure = c("rice", "maize"))

ggplot(AB, aes(x = ANN, y = mesta)) + geom_point()+ xlab("ANNUAL RAINFALL") +

ggplot(AB, aes(x = ANN, y = maize)) + geom_point()+ xlab("ANNUAL RAINFALL") +

3. WRITE AN R CODE TO DEVELOP A RECOMMENDER SYSTEM FOR

STEP 1 - IMPORTING THE PACKAGES AND LOADING THE DATASET

STEP 2 – VISUALIZING THE DATA

height<- sort(table(A$COUNTRY), decreasing = TRUE)

plot (A$FACULTY_QUALITY, A$RANK, xlab ="FACULTY QUALITY", ylab =

plot (A$INFLUENCE, A$RANK, xlab ="INFLUENCE", ylab="RANK", main =

plot (A$CITATIONS, A$RANK, xlab = "CITATIONS", ylab = "RANK", main =

STEP 3 – USING APRIORI ALGORITHM

matrix1 <- as(split(A[ , "COUNTRY"], A[ , "NATIONAL_RANK"]), "transactions")

set.seed(240) plot(associationRules, method = "graph", measure = "support",

rules <- sapply(associationRules$rules, function(x){

rules <- as.character(rules)

associationRules$movieLeftSide <- NULL

associationRules$movieRightSide <- NULL

associationRules <- arrange(associationRules, desc(lift))

You might also like