Professional Documents
Culture Documents
HRM
HRM
HRM
grades
marks=c(86,78,45,90,59,78,87,44,98,60)
grades
rno=c(1:10)
rno
marks=marks+2
marks
slab=marks/10
slab
grades[3]
slab[c(3,5)]
marks[8:10]
gf=as.factor(grades)
gf
as.numeric(gf)
rno=c(rno,11)
rno
marks=c(marks,NA)
marks
grades=c(grades,NA)
grades
anyNA(marks)
mean(marks)
mean(marks,na.rm = TRUE)
d=data.frame(rno,grades,marks)
d
head(d)
tail(d)
grades[5]
x=5
x
x<-5
x
x=y=5
x
y=5L
class(y)
class(x)
x+y
x-y
x==y
x=c(1:10)
x
y=c(-5:4)
y
z=c("hockey","football","basketball","cricket","tennis","hockey","football","basketball","cricket","tennis")
z
unique=as.factor(z)
unique
nchar(z)
l=data.frame(x,y,z)
l
class(l)
mtcars
x=c(1:5)
x
y=c(-2:2)
y
data.frame(x,y)
x+y
x-y
x*y
x/y
x^2
sqrt(x)
sqrt(y)
date1=as.Date("2019-11-15")
date1
dim(l)
str(l)
x=c(1,2,NA,3,4,NA)
mean(x)
mean(x,na.rm = TRUE)
TRUE*10
FALSE*5
mtcars
dim(mtcars)
row.names(mtcars)
colnames(mtcars)
mtcars[,4]
mtcars[2,]
mtcars[8,4]
head(mtcars)
tail(mtcars)
mtcars[3,4:5]
mean(mtcars$mpg)
summary(mtcars)
View(mtcars)
mtcars
mtcars %>% select(disp,mpg,cyl) %>% filter(mpg<22,cyl==4) %>% arrange(disp) %>% mutate(mean(disp))
mtcars
View(mtcars)
data(mtcars)
mtcars
sum(mtcars$mpg>=21)
sum(mtcars$mpg==21)
mtcars %>% select(mpg,cyl) %>% filter(mpg>=21)
library(dplyr)
mtcars %>% select(am,mpg) %>% filter(mpg>24,am==1)
airquality
View(airquality)
anyNA(airquality)
mean(airquality$Ozone)
mean(airquality$Ozone,na.rm = TRUE)
median(airquality$Solar.R,na.rm = TRUE)
airquality[4,4]
airquality %>% select(Temp,Day) %>% filter(Day==4)
airquality %>% select(Wind,Day) %>% filter(Day==11)
airquality %>% select(Temp,Day,Solar.R) %>% filter(Day==4,Solar.R>=313)
airquality %>% select(Temp,Day,Month) %>% filter(Day==4,Month==5)
# arrange value of ozone in descending order and give details of temperature in 6th month
airquality %>% select(Temp,Month,Ozone) %>% filter(Month==6) %>% arrange(desc(Ozone))
hflights
hf=hflights
# data frame
player=c("sachin","sehwag","virat","dhoni","yuvraj","raina","bumrah","shami","ashwin","bhuvneshwar")
player
score=c(99,209,183,156,143,56,33,10,5,16)
score
matchesplayed=c(234,209,123,235,123,108,45,56,34,123)
matchesplayed
dismissals=c(134,180,98,189,100,98,40,NA,32,NA)
dismissals
tournament=c("ICC WC","ICC WC","ICC WC","ICC WC","ICC WC","ICC WC","ICC WC","ICC WC","ICC WC","ICC WC")
tournament
l=data.frame(player,score,matchesplayed,dismissals,tournament)
l
# apply select and filter player having more than 150 runs
l %>% select(player,score) %>% filter(score>150)
# DISTRIBUTION DIAGRAMS
diamonds
# create a histogram by putting carat on x-axis and price on y-axis
hist(diamonds$carat,xlab = "carat",ylab = "price",main = "chandan")
# scatterplot
plot(diamonds$carat,diamonds$price)
# boxplot
boxplot(diamonds$carat,diamonds$price)
summary(diamonds$carat)
# ggplot2
ggplot(diamonds,aes(x=carat))+geom_histogram()+facet_wrap(~color)
# mtcars prepare histogram for a and m by taking mpg as xlab and cyl as ylab
mtcars
hist(mtcars$mpg,xlab = "mpg",ylab = "cyl",main = "chandan")
ggplot(mtcars,aes(x=mpg))+geom_histogram()+facet_wrap(~am)
# binwidth is for graph width, geom_ponit is for scatter points alpha is for fading of dots, geom_violin is for violin
shaped graph
iris
head(iris)
ggplot(iris,aes(x=Sepal.Length))+geom_histogram(binwidth = .5, fill="blue")
ggplot(iris,aes(x=Sepal.Length, y=Sepal.Width, color=Species))+geom_point(alpha=1)+geom_boxplot()
+geom_violin(alpha=0.2)+facet_wrap(~Species)+ggtitle("Flower Data")
# PIE CHART, coord_polar is for pie chart otherwise geom_bar is for bar chart
G1=LETTERS[1:5]
G1
V=c(33,45,65,43,87)
V
D=data.frame(G1,V)
D
ggplot(D, aes(x="", y=V, fill=G1))+geom_bar(stat = "identity", width = 1)+coord_polar("y", start = 0)+ggtitle("PIECHART")
mtcars
# CORELATION
cor(mtcars)
View(cor(mtcars))
ggcorrplot(q,method = "circle")
ggcorrplot(q,method = "square")
ggcorrplot(q,method = "square", type = "lower")
ggcorrplot(q,method = "square", type = "upper")
corrplot.mixed(q,upper = "square", lower = "number")
diamonds
d=diamonds
z=d[,5:10]
b=cor(z)
b
corrplot(b,method = "square", type = "lower")
corrplot.mixed(b,upper = "ellipse", lower = "number")
mtcars
# CORELATION
cor(mtcars)
View(cor(mtcars))
ggcorrplot(q,method = "circle")
ggcorrplot(q,method = "square")
ggcorrplot(q,method = "square", type = "lower")
ggcorrplot(q,method = "square", type = "upper")
corrplot.mixed(q,upper = "square", lower = "number")
diamonds
d=diamonds
z=d[,5:10]
b=cor(z)
b
corrplot(b,method = "square", type = "lower")
corrplot.mixed(b,upper = "ellipse", lower = "number")
# FORECAST
austres
dim(austres)
# IMPORTING DATASET
read.csv(file.choose(), header = TRUE, stringsAsFactors = F)
rainfall=c(234,678,234,243,567,345,242,890,356,234,567,234)
rainfall2=c(789,678,453,346,987,341,890,427,341,543,987,341)
dim(rainfall)
str(rainfall)
cominedrainfall=matrix(c(rainfall,rainfall2),nrow=12)
cominedrainfall
arimafit=auto.arima(Airp)
arimafit
predictingfuture=forecast(arimafit, h=60)
plot(predictingfuture)
USAccDeaths
usd=USAccDeaths
usd
start(usd)
end(usd)
frequency(usd)
US=ts(usd, start = c(1973,1), end = c(1978,12), frequency = 12)
US
usdeath=auto.arima(usd)
usdeath
forcast=forecast(usdeath, h=48)
forcast
plot(forcast)
# SLICE COMMAND
PlantGrowth
P=PlantGrowth
P
P %>% slice(c(1:5),8,15:20)
# for making perfect data (removing non stationary of data) using log command, diff is used to remove time
dependencies
Airp
plot(Airp)
plot(log(Airp))
plot(diff(log(Airp)))
fit=auto.arima(log(Airp), approximation = F, trace = F)
fit
prediction=predict(fit, n.ahead = 36)
prediction
# using antilog
newpred=round(2.718^prediction$pred,0)
newpred
ts.plot(Airp, newpred, col=c("blue","green"))
# linear model
USArrests
str(USArrests)
ggplot(USArrests, aes(x=Rape, y=Assault))+geom_smooth(method = "lm")+geom_point()
lm(Rape~Murder+Assault+UrbanPop, data = USArrests)
##################################################################################################
#######################################################################################
library(kernlab);data("spam")
spam
str(spam)
dim(spam)
set.seed(23456)
intrain=createDataPartition(y=spam$type, p=0.75, list = F)
intrain
training1=spam[intrain,]
training1
testing1=spam[-intrain,]
testing1
dim(testing1)
dim(training1)
model1=train(type~.,data = training1, method="knn")
model1
pred=predict(model1, newdata=testing1)
pred
confusionMatrix(pred, testing1$type)
# UNSUPERVISED LEARNING
ir=iris
ir
ir=iris[,-5]
ir
ggplot(iris,aes(x=Sepal.Length, y=Petal.Length, color=Species))+geom_point(alpha=1)
intel=readLines(file.choose())
intel
articleintel=Corpus(VectorSource(intel))
articleintel
articleintel=tm_map(articleintel,removeNumbers)
articleintel=tm_map(articleintel,removePunctuation)
stopwords()
articleintel=tm_map(articleintel,removeWords,stopwords())
articleintel=tm_map(articleintel,removeWords,c("its","will","has","the"))
wordcloud(articleintel,random.order = F)
## create TDM
tdm=TermDocumentMatrix(articleintel)
tdm
class(tdm)
tdm=as.matrix(tdm)
tdm
View(tdm)
termfreq=rowSums(as.matrix(tdm))
termfreq
View(termfreq)
## subsetting TDM
termfreqsubset=subset(termfreq, termfreq>=2)
termfreqsubset
View(termfreqsubset)
## creating a dataframe
tdmf=data.frame(term=names(termfreqsubset),freq=termfreqsubset)
tdmf
row.names(tdmf)=NULL
View(tdmf)
tdmplot=ggplot(tdmf, aes(x=term, y=freq))+geom_bar(stat = "identity")+xlab("Terms")+ylab("Count")+coord_flip()
+theme(axis.text = element_text(size = 6))
tdmplot
## SENTIMENT ANALYSIS
class(articleintel)
a=as.character(articleintel)
class(a)
mysentiment=get_nrc_sentiment(a)
mysentiment
sentimentscores=data.frame(colSums(mysentiment[,]))
sentimentscores
## TOPIC MODELING
## Latent Dirichlet Allocation (LDA) models are a widely used topic modeling technique
## create DTM
articleintel=Corpus(VectorSource(files))
articleintel
articleintel=tm_map(articleintel,removeNumbers)
articleintel=tm_map(articleintel,removePunctuation)
stopwords()
articleintel=tm_map(articleintel,removeWords,stopwords())
articleintel=tm_map(articleintel,removeWords,c("its","will","has","the"))
wordcloud(articleintel,random.order = F)