Professional Documents
Culture Documents
BAR-Module 2-Unsupervised Learning
BAR-Module 2-Unsupervised Learning
install.packages("arules")
install.packages("arulesViz")
install.packages("tidyverse")
install.packages("plyr")
install.packages("readxl")
install.packages("lubridate")
install.packages("ggplot2")
install.packages("knitr")
library(arules)
library(arulesViz)
library(tidyverse)
library(plyr)
library(readxl)
library(lubridate)
library(ggplot2)
library(knitr)
trObj<-as(grocery,"transactions")#avoid
summary(tr)
itemFrequencyPlot(trObj,topN=20,type="absolute")
itemFrequencyPlot(tr,topN=20,type="relative")
library(arules)
Grocery <- read.csv("groceries.csv")
View(Grocery)
str(Grocery)
##convert in to transactional data
transactional_data <- as(Grocery, "transactions")
str(transactional_data)
inspect(head(transactional_data, 2))
grocery_rules <- apriori(transactional_data, parameter = list(support = 0.01,
confidence = 0.5))
grocery_rules1 <- apriori(transactional_data, parameter = list(support = 0.1,
confidence = 0.5))
grocery_rules
inspect(head(sort(grocery_rules, by = "confidence"), 3))
inspect(grocery_rules1)
grocery_rules2 <- apriori(transactional_data, parameter = list(support = 0.02,
confidence = 0.5))
inspect(head(sort(grocery_rules2, by = "confidence"), 3))
wholemilk_rules <- apriori(data=Groceries, parameter=list (supp=0.001,conf = 0.08),
appearance = list (rhs="whole milk"))
inspect(head(sort(wholemilk_rules, by = "confidence"), 3))
data<-as.data.frame(AP_IMPOVERISHMENT)
data<-DP_final[,2:41]
library(psych)
fit <- principal(data, nfactors=6, rotate="varimax")#rotate ="none", "varimax",
#"quatimax", "promax"
rotated_matrix<-fit$loadings[,1:6] # print results
communality<-fit$communality
var_acc<-fit$Vaccounted
write.csv(rotated_matrix,"D:/Ph.D/rot-matrix-class.csv")
write.csv(communality,"D:/Ph.D/Varimax-communality.csv")
write.csv(var_acc,"D:/Ph.D/Varimax-var-accounted.csv")
#Reliability
corr_matrix<-corr.test(data,use = "pairwise", method = "pearson")
item_item_corr<-corr_matrix$r
install.packages("psy")
library(psy)
attach(data)
L<-list(c("VD-1","VD-2.1","VD-2.2","VD-2.3","VD-2.4","VD-2.5","VD-3","VD-4"),
c("VL-1.1","VL-1.2","VL-1.3","VL-1.4","VL-1.5","VL-1.6","VL-1.7"),
c("VH-2","VH-3","VH-4","VH-5"),
c("VO-1","VO-2","VO-3"))
mtmm(data,L)
#chronbach alpha
cr_alpha1<-psych::alpha(data[,12:14],check.keys = TRUE)
cr_alpha1$total$raw_alpha
cr_alpha1$total$std.alpha#0.2429168
cr_alpha1$alpha.drop$std.alpha
##CLUSTER ANALYSIS
data<-AP_final[,42:51]
data<-IMP_AP[,42:49]
data[is.na(data)]<-0
#partitioning method
install.packages("factoextra")
install.packages("cluster")
install.packages("magrittr")
install.packages("rlang")
install.packages("ggplot2")
library(factoextra)
library("cluster")
library(magrittr)
library(rlang)
library(ggplot2)
##Hiearchical
my_data_2<-my_data[2:100,]
clusters <- hclust(dist(my_data_2), method = 'average')#complete, single,ward.D2
plot(clusters)
clusterCut <- cutree(clusters, 10)
table(clusterCut)
Assume the data have been clustered via any technique, such as k-means, into
clusters.
For each datum , let be the average dissimilarity of with all other data
within the same cluster.
Any measure of dissimilarity can be used but distance measures are the most common.
We can interpret as how well is assigned to its cluster (the smaller the value,
the better the assignment).
We then define the average dissimilarity of point to a cluster as the average
of the distance from to points in .
Let be the lowest average dissimilarity of to any other cluster which is not
a member.
The cluster with this lowest average dissimilarity is said to be the "neighbouring
cluster" of because it is the next best fit cluster for point .
We now define: