HRM

You might also like

Download as docx, pdf, or txt
Download as docx, pdf, or txt
You are on page 1of 10

grades=c("O","B+","A","A+","C","B","A","B+","A","O+")

grades
marks=c(86,78,45,90,59,78,87,44,98,60)
grades
rno=c(1:10)
rno
marks=marks+2
marks
slab=marks/10
slab
grades[3]
slab[c(3,5)]
marks[8:10]
gf=as.factor(grades)
gf
as.numeric(gf)
rno=c(rno,11)
rno
marks=c(marks,NA)
marks
grades=c(grades,NA)
grades
anyNA(marks)
mean(marks)
mean(marks,na.rm = TRUE)
d=data.frame(rno,grades,marks)
d
head(d)
tail(d)
grades[5]

x=5
x
x<-5
x
x=y=5
x
y=5L
class(y)
class(x)
x+y
x-y
x==y
x=c(1:10)
x
y=c(-5:4)
y
z=c("hockey","football","basketball","cricket","tennis","hockey","football","basketball","cricket","tennis")
z
unique=as.factor(z)
unique
nchar(z)
l=data.frame(x,y,z)
l
class(l)
mtcars
x=c(1:5)
x
y=c(-2:2)
y
data.frame(x,y)
x+y
x-y
x*y
x/y
x^2
sqrt(x)
sqrt(y)
date1=as.Date("2019-11-15")
date1
dim(l)
str(l)
x=c(1,2,NA,3,4,NA)
mean(x)
mean(x,na.rm = TRUE)
TRUE*10
FALSE*5

mtcars
dim(mtcars)
row.names(mtcars)
colnames(mtcars)
mtcars[,4]
mtcars[2,]
mtcars[8,4]
head(mtcars)
tail(mtcars)
mtcars[3,4:5]
mean(mtcars$mpg)
summary(mtcars)
View(mtcars)
mtcars
mtcars %>% select(disp,mpg,cyl) %>% filter(mpg<22,cyl==4) %>% arrange(disp) %>% mutate(mean(disp))
mtcars
View(mtcars)
data(mtcars)

mtcars
sum(mtcars$mpg>=21)
sum(mtcars$mpg==21)
mtcars %>% select(mpg,cyl) %>% filter(mpg>=21)
library(dplyr)
mtcars %>% select(am,mpg) %>% filter(mpg>24,am==1)

airquality
View(airquality)
anyNA(airquality)
mean(airquality$Ozone)
mean(airquality$Ozone,na.rm = TRUE)
median(airquality$Solar.R,na.rm = TRUE)
airquality[4,4]
airquality %>% select(Temp,Day) %>% filter(Day==4)
airquality %>% select(Wind,Day) %>% filter(Day==11)
airquality %>% select(Temp,Day,Solar.R) %>% filter(Day==4,Solar.R>=313)
airquality %>% select(Temp,Day,Month) %>% filter(Day==4,Month==5)

# arrange value of ozone in descending order and give details of temperature in 6th month
airquality %>% select(Temp,Month,Ozone) %>% filter(Month==6) %>% arrange(desc(Ozone))

# give value of ozone on the day when wind is 8 and temperature is 72


airquality %>% select(Ozone,Day,Wind,Temp) %>% filter(Wind==8,Temp==72)

# mutate is for adding new variable


airquality %>% mutate(Ratio=Temp/Wind)
air=airquality
air
anyNA(air)
head(air)

# replace missing value of NA


replace_na(air$Ozone,replace = mean(air$Ozone,na.rm = TRUE))

hflights
hf=hflights

# replace, groupby, summarise


View(hf)
k=hf %>% group_by(UniqueCarrier) %>% filter(Distance>3000)
k
View(k)

#where taxing took longer than flight


View(hf %>% select(TaxiIn,TaxiOut,AirTime) %>% filter(TaxiIn+TaxiOut>AirTime))

#all flights departed late but arrived ahead of schedue


View(hf %>% group_by(UniqueCarrier) %>% filter(DepDelay>0,ArrDelay<0))

#all flights that departed before 5 and arrived after 10


View(hf %>% filter(DepTime<1700,ArrTime>2200))

#all cancelled flights on weekend


hf %>% filter(DepTime)

# data frame
player=c("sachin","sehwag","virat","dhoni","yuvraj","raina","bumrah","shami","ashwin","bhuvneshwar")
player
score=c(99,209,183,156,143,56,33,10,5,16)
score
matchesplayed=c(234,209,123,235,123,108,45,56,34,123)
matchesplayed
dismissals=c(134,180,98,189,100,98,40,NA,32,NA)
dismissals
tournament=c("ICC WC","ICC WC","ICC WC","ICC WC","ICC WC","ICC WC","ICC WC","ICC WC","ICC WC","ICC WC")
tournament
l=data.frame(player,score,matchesplayed,dismissals,tournament)
l

# value in 3rd column and 5th row


l[5,3]
# no. of rows and column
dim(l)
# number of rows

# head command 7th row


head(l,7)

# apply select and filter player having more than 150 runs
l %>% select(player,score) %>% filter(score>150)

# info in 4th column


l[,4]

# replace NA with median


g=replace_na(dismissals,replace = median(dismissals,na.rm = TRUE))
g

# DISTRIBUTION DIAGRAMS

diamonds
# create a histogram by putting carat on x-axis and price on y-axis
hist(diamonds$carat,xlab = "carat",ylab = "price",main = "chandan")

# scatterplot
plot(diamonds$carat,diamonds$price)

# boxplot
boxplot(diamonds$carat,diamonds$price)
summary(diamonds$carat)

# ggplot2
ggplot(diamonds,aes(x=carat))+geom_histogram()+facet_wrap(~color)

# mtcars prepare histogram for a and m by taking mpg as xlab and cyl as ylab
mtcars
hist(mtcars$mpg,xlab = "mpg",ylab = "cyl",main = "chandan")
ggplot(mtcars,aes(x=mpg))+geom_histogram()+facet_wrap(~am)

# for two variables


ggplot(diamonds,aes(x=carat))+geom_histogram()+facet_grid(cut~color)
ggplot(mtcars,aes(x=mpg))+geom_histogram()+facet_grid(cyl~am)

# binwidth is for graph width, geom_ponit is for scatter points alpha is for fading of dots, geom_violin is for violin
shaped graph
iris
head(iris)
ggplot(iris,aes(x=Sepal.Length))+geom_histogram(binwidth = .5, fill="blue")
ggplot(iris,aes(x=Sepal.Length, y=Sepal.Width, color=Species))+geom_point(alpha=1)+geom_boxplot()
+geom_violin(alpha=0.2)+facet_wrap(~Species)+ggtitle("Flower Data")

# geom_density is DENSITY DIAGRAM


diamonds
ggplot(diamonds,aes(x=carat))+geom_density(aes(fill="red"))
ggplot(diamonds,aes(x=carat, y=price))+geom_smooth()
# REGRESSION, lm=Linear Model
z=lm(price~carat, data=diamonds)
z
summary(z)

# LINE DIAGRAM (DEVIATION)


economics
ggplot(economics,aes(x=date, y=pop, color="red"))+geom_line()

# PIE CHART, coord_polar is for pie chart otherwise geom_bar is for bar chart
G1=LETTERS[1:5]
G1
V=c(33,45,65,43,87)
V
D=data.frame(G1,V)
D
ggplot(D, aes(x="", y=V, fill=G1))+geom_bar(stat = "identity", width = 1)+coord_polar("y", start = 0)+ggtitle("PIECHART")

mtcars

# CORELATION
cor(mtcars)
View(cor(mtcars))

# ROUNDING OFF DECIMAL VALUES


q=round(cor(mtcars),2)

ggcorrplot(q,method = "circle")
ggcorrplot(q,method = "square")
ggcorrplot(q,method = "square", type = "lower")
ggcorrplot(q,method = "square", type = "upper")
corrplot.mixed(q,upper = "square", lower = "number")

diamonds
d=diamonds
z=d[,5:10]
b=cor(z)
b
corrplot(b,method = "square", type = "lower")
corrplot.mixed(b,upper = "ellipse", lower = "number")

mtcars

# CORELATION
cor(mtcars)
View(cor(mtcars))

# ROUNDING OFF DECIMAL VALUES


q=round(cor(mtcars),2)

ggcorrplot(q,method = "circle")
ggcorrplot(q,method = "square")
ggcorrplot(q,method = "square", type = "lower")
ggcorrplot(q,method = "square", type = "upper")
corrplot.mixed(q,upper = "square", lower = "number")
diamonds
d=diamonds
z=d[,5:10]
b=cor(z)
b
corrplot(b,method = "square", type = "lower")
corrplot.mixed(b,upper = "ellipse", lower = "number")

# FORECAST

austres
dim(austres)

# IMPORTING DATASET
read.csv(file.choose(), header = TRUE, stringsAsFactors = F)

rainfall=c(234,678,234,243,567,345,242,890,356,234,567,234)
rainfall2=c(789,678,453,346,987,341,890,427,341,543,987,341)
dim(rainfall)
str(rainfall)
cominedrainfall=matrix(c(rainfall,rainfall2),nrow=12)
cominedrainfall

#command for timeseries


timeseries.object.name=ts(data,start,end,frequency)
combinedrainfall.timeseries=ts(cominedrainfall,start = c(2012,1),frequency = 12)
combinedrainfall.timeseries
AirPassengers
Airp=AirPassengers
Airp
Air.timeseries=ts(Airp,start = c(1949,1),end = c(1960,1),frequency = 12)
Air.timeseries
start(Air.timeseries)
end(Air.timeseries)
frequency(AirPassengers)

arimafit=auto.arima(Airp)
arimafit
predictingfuture=forecast(arimafit, h=60)
plot(predictingfuture)

predictingfuture2=predict(arimafit, n.ahead = 60)


predictingfuture2

USAccDeaths
usd=USAccDeaths
usd
start(usd)
end(usd)
frequency(usd)
US=ts(usd, start = c(1973,1), end = c(1978,12), frequency = 12)
US
usdeath=auto.arima(usd)
usdeath
forcast=forecast(usdeath, h=48)
forcast
plot(forcast)

# SLICE COMMAND
PlantGrowth
P=PlantGrowth
P
P %>% slice(c(1:5),8,15:20)

# for making perfect data (removing non stationary of data) using log command, diff is used to remove time
dependencies
Airp
plot(Airp)
plot(log(Airp))
plot(diff(log(Airp)))
fit=auto.arima(log(Airp), approximation = F, trace = F)
fit
prediction=predict(fit, n.ahead = 36)
prediction

# using antilog
newpred=round(2.718^prediction$pred,0)
newpred
ts.plot(Airp, newpred, col=c("blue","green"))

# linear model

USArrests
str(USArrests)
ggplot(USArrests, aes(x=Rape, y=Assault))+geom_smooth(method = "lm")+geom_point()
lm(Rape~Murder+Assault+UrbanPop, data = USArrests)

# alternate of above command ###### ~. this command selects all variables


lm(Rape~.,data = USArrests)

## step 1 : data pre-processing


diabetes=read.csv(file.choose())
diabetes
str(diabetes)
dim(diabetes)
set.seed(12345)

## step 2 : data preparation


anyNA(diabetes)

## step 3 : data partitioning


intrainnew=createDataPartition(diabetes$Outcome, p=0.75, list = F)
training=diabetes[intrainnew,]
training
testing=diabetes[-intrainnew,]
testing
dim(training)
dim(testing)
## step 4 : model building
model=train(as.factor(Outcome)~.,data = training, method="glm")
model

## step 5 : data prediction


Pred=predict(model, newdata=testing)
Pred
confusionMatrix(Pred, as.factor(testing$Outcome) )

##################################################################################################
#######################################################################################

library(kernlab);data("spam")
spam
str(spam)
dim(spam)
set.seed(23456)
intrain=createDataPartition(y=spam$type, p=0.75, list = F)
intrain
training1=spam[intrain,]
training1
testing1=spam[-intrain,]
testing1
dim(testing1)
dim(training1)
model1=train(type~.,data = training1, method="knn")
model1
pred=predict(model1, newdata=testing1)
pred
confusionMatrix(pred, testing1$type)

# UNSUPERVISED LEARNING

ir=iris
ir
ir=iris[,-5]
ir
ggplot(iris,aes(x=Sepal.Length, y=Petal.Length, color=Species))+geom_point(alpha=1)

## distribute into 3 clusters


New=kmeans(dist(ir),3)
New
New$cluster

## show as per the variable "cluster"


iris1=ir %>% mutate(clusters=New$cluster)
iris1

## hierarichal clustering ## dendogram plot


h1=hclust(dist(iris1), method = "ward.D")
plot(h1)

## using method "complete


h1=hclust(dist(iris1), method = "complete")
plot(h1)

intel=readLines(file.choose())
intel

articleintel=Corpus(VectorSource(intel))
articleintel
articleintel=tm_map(articleintel,removeNumbers)
articleintel=tm_map(articleintel,removePunctuation)
stopwords()
articleintel=tm_map(articleintel,removeWords,stopwords())
articleintel=tm_map(articleintel,removeWords,c("its","will","has","the"))
wordcloud(articleintel,random.order = F)

## create TDM
tdm=TermDocumentMatrix(articleintel)
tdm
class(tdm)
tdm=as.matrix(tdm)
tdm
View(tdm)
termfreq=rowSums(as.matrix(tdm))
termfreq
View(termfreq)

## subsetting TDM
termfreqsubset=subset(termfreq, termfreq>=2)
termfreqsubset
View(termfreqsubset)

## creating a dataframe
tdmf=data.frame(term=names(termfreqsubset),freq=termfreqsubset)
tdmf
row.names(tdmf)=NULL
View(tdmf)
tdmplot=ggplot(tdmf, aes(x=term, y=freq))+geom_bar(stat = "identity")+xlab("Terms")+ylab("Count")+coord_flip()
+theme(axis.text = element_text(size = 6))
tdmplot

## SENTIMENT ANALYSIS
class(articleintel)
a=as.character(articleintel)
class(a)
mysentiment=get_nrc_sentiment(a)
mysentiment
sentimentscores=data.frame(colSums(mysentiment[,]))
sentimentscores

## giving name to score column


names(sentimentscores)="Score"
sentimentscores

## giving row names


sentimentscores= cbind("sentiment" = rownames(sentimentscores), sentimentscores)
sentimentscores
## removing row names
rownames(sentimentscores)=NULL
sentimentscores

## plotting the sentiment scores


ggplot(sentimentscores, aes(x = sentiment, y = Score))+geom_bar(aes(fill=sentiment), stat = "identity")
+theme(legend.position = "none") + xlab("sentiment") + ylab("Score") + ggtitle("Total Sentiment Score")

## TOPIC MODELING
## Latent Dirichlet Allocation (LDA) models are a widely used topic modeling technique
## create DTM

intel = list.files(getwd(), pattern = "*.txt")


intel
files=lapply(intel,readLines)
files

articleintel=Corpus(VectorSource(files))
articleintel
articleintel=tm_map(articleintel,removeNumbers)
articleintel=tm_map(articleintel,removePunctuation)
stopwords()
articleintel=tm_map(articleintel,removeWords,stopwords())
articleintel=tm_map(articleintel,removeWords,c("its","will","has","the"))
wordcloud(articleintel,random.order = F)

articledtm = DocumentTermMatrix(articleintel, control = list(minWordLength = 3))


articledtm
k=3
SEED = 1234
article.lda=LDA(articledtm, k, method = "Gibbs", control = list(seed = SEED))
article.lda
lda.topics = as.matrix(topics(article.lda))
lda.topics
lda.terms=terms(article.lda)
lda.terms

You might also like