Professional Documents
Culture Documents
Digital Assignment-6: Read The Data
Digital Assignment-6: Read The Data
Digital Assignment-6: Read The Data
>data=read.csv("D:\\Onlineshoppersintention.csv")
>str(data)
>data=na.omit(data)
>str(data)
>unique(data$Month)
Fix Structure of Data
>summary(data[,c(1:10)])
>table(data$Revenue)
>table(data$Weekend)
>table(data$VisitorType)
>table(data$TrafficType)
>table(data$Region)
>table(data$Browser)
>table(data$OperatingSystems)
>table(data$Month)
Correlation
>library(corrplot)
corrplot 0.90 loaded
>correlation<- cor(data[,c(1:10)])
>corrplot(correlation, method = "square", type = "lower", diag = TRUE)
>library(ggplot2)
Warning message:
package ‘ggplot2’ was built under R version 4.0.5
>
>options(repr.plot.width = 8, repr.plot.height = 5)
>ggplot(data = data, mapping = aes(x = BounceRates, y = ExitRates)) +
geom_point(mapping = aes(color = Revenue)) + geom_smooth(se = TRUE, alpha = 0.5)
+ theme_light() + ggtitle("Relationship between Exit Rates and Bounce Rates") +
xlab("Bounce Rates") + ylab("Exit Rates") + geom_text(mapping = aes(x = 0.15, y =
0.05, label = "Correlation = 0.913"))
#Trend line for revenue status based on months and trend line for visitor type based on
months
>library(caret)
Loading required package: lattice
Warning message:
package ‘caret’ was built under R version 4.0.5
>
>set.seed(777)
>split <- createDataPartition(data$Revenue, p = 0.8, list = FALSE)
>train<- data[split,]
>test<- data[-split,]
#Scale
>library(rpart)
>library(rpart.plot)
Warning message:
package ‘rpart.plot’ was built under R version 4.0.5
>
>
>set.seed(1)
> model4_decision <- rpart(Revenue ~ ., data = sample_train, method = "class")
>options(repr.plot.width = 10, repr.plot.height = 10)
>rpart.plot(model4_decision, box.palette = "RdYlGn", shadow.col = "darkgray")
>data.frame(model4_decision$variable.importance)
#metrics
>metrics<- function(x){
+ Accuracy <- (x[4] + x[1]) / (nrow(train))
+ ErrorRate<- (x[3] + x[2]) / (nrow(train))
+ TPR_Recall<- x[4] / (x[2] + x[4])
+ FPR <- x[3] / (x[3] + x[1])
+ TNR_Specificity<- x[1] / (x[1] + x[3])
+ Precision <- x[4] / (x[3] + x[4])
+ F1score <- (2 * Precision * TPR_Recall) / (Precision + TPR_Recall)
+ cat("Accuracy = ", Accuracy, "\n", "Error Rate = ", ErrorRate, "\n", "True Positive
Rate (Recall) = ", TPR_Recall, "\n", "False Positive Rate = ", FPR, "\n", "True Negative
Rate (Specificity) = ", TNR_Specificity, "\n", "Precision = ", Precision, "\n", "F1Score = ",
F1score)
+}
#prediction
Importing Packages
>library(dplyr)
>library(ggplot2)
>library(xlsx)
>library(reshape2)
>library(corrplot)
Loading Dataset
>grains=read.csv("D:\\Table_8.3-All_India_1.csv")
>
rain=read.xlsx("D:\\All_India_Area_Weighted_Monthly_Seasonal_And_Annual_Rainfal
l.xls",sheetIndex = 1)
>str(grains)
>grains<- grains %>% rename(rice = 2, jowar = 3, bajra = 4, maize = 5, ragi = 6, millets =
7, wheat = 8)
>grains<- grains %>% rename(barley = 9, tcereals = 10, gram = 11, tur = 12,
otherpulses = 13, totalpulses = 14)
>grains<- grains %>% rename(totalgrains = 15, gnuts = 16, sesame = 17, mustard = 18,
linseed = 19)
>grains<- grains %>% rename(castor = 20, totaloilseeds = 21, cotton = 22, jute = 23,
mesta = 24, tea = 25)
>grains<- grains %>% rename(coffee = 26, rubber = 27, banana = 28, sugarcane = 29,
tobacco = 30, potatoes = 31)
>grains<- grains %>% rename(pepper = 32, chilles = 33, ginger = 34, coconut = 35,
turmeric = 36)
>cormat<- round(cor(as.matrix(grains[,2:36])),2)
>get_lower_tri<- function(cormat){
+ cormat[upper.tri(cormat)] <- NA
+ return(cormat)
+}
>lowertri<- get_lower_tri(cormat)
>sorted.cormat<- melted.cormat[order(melted.cormat$value),]
>neg.cormat<- head(sorted.cormat, 6)
>pos.cormat<- sorted.cormat[c(140:170),]
>head(rain)
Combined EDA
>
>grains.new<- grains[c(1:14),]
>
>
>
Maize crop production vs annual rainfall
library(DT)
data<- read.csv("E:\\cwurData.csv")
data <- subset(data, data$year == 2015)
str(data)
height<- sort(table(data$country), decreasing = TRUE)
datatable(usa)
abline(c)
summary(c)
summary(regline)
usaEmployment <- usa[order(usa$alumni_employment),]
usaEmployment<-cbind(usaEmployment,usaEmploymentRank)
library(DT)
plot(usaEmployment$national_rank,usaEmployment$usaEmploymentRank, xlab =
"National Rank", ylab = string ,main = "National Rank vs Employment Rank")
abline(c)
plot(usaEmployment$quality_of_education,usaEmployment$usaEmploymentRank,
xlab = "Quality of Education ", ylab = string, main = "Quality of Education vs
Employment Rank")
abline(c)
plot(usaEmployment$quality_of_faculty,usaEmployment$usaEmploymentRank, xlab =
"Quality of Faculty", ylab = string, main = "Quality of Faculty vs Employment Rank")
abline(c)
summary(c)
linReg<-
lm(usaEmploymentRank~national_rank+quality_of_education+quality_of_faculty,
data = usaEmployment)
summary(linReg)