Professional Documents
Culture Documents
21blc1084 Edalab11
21blc1084 Edalab11
21blc1084 Edalab11
data<- read.csv("ClothReview.csv")
library(dplyr)
data=sample_n(data,100)
# install.packages("readr")
library(readr)
# install.packages("tm")
library(tm)
# install.packages("SnowballC")
library(SnowballC)
corpus = VCorpus(VectorSource(data$Review.Text))
corpus[[1]][1]
data$Recommended.IND[1]
corpus = tm_map(corpus,PlainTextDocument)
corpus = tm_map(corpus,content_transformer(tolower))
corpus[[1]][1]
corpus = tm_map(corpus,removePunctuation)
corpus[[1]][1]
corpus = tm_map(corpus, removeWords, c("cloth", stopwords("english")))
frequencies = DocumentTermMatrix(corpus)
tSparse = as.data.frame(as.matrix(sparse))
colnames(tSparse) = make.names(colnames(tSparse))
tSparse$recommended = data$Recommended.IND
# install.packages("caTools")
library(caTools)
library(randomForest)
trainSparse$recommended = as.factor(trainSparse$recommended)
testSparse$recommended= as.factor(testSparse$recommended)
library(caret)
confusionMatrix(table(predictRF,testSparse$recommended),positive='1')