Download as pdf or txt
Download as pdf or txt
You are on page 1of 3

R Notebook

BIS 581 #load libraries


VDI <- read.csv("vdi_serverlogs.csv", header=TRUE, stringsAsFactors =
FALSE)

apps <- read.csv("vdi_statsapps.csv", header=TRUE, stringsAsFactors =


FALSE)

join the two together


apps$VDI_ID <- as.integer(apps$VDI_ID)

## Warning: NAs introduced by coercion

usage <- VDI %>% inner_join(apps)

## Joining, by = "VDI_ID"

#clean up

Answer the questions handed out on Blackboard Create a presentation of their findings.
The audience for the presentation is the CIO, assistant CIO and some of the tech managers
along with a representative from purchasing. Your R notebook will be the appendix to show
how you got the answers you did. NOTE: graduate students have extra for this, see
Blackboard for details
How many users were on the system total?
usage %>% select(userid) %>% unique() %>% nrow()

## [1] 25963

What is the average number of users per day?


usage$logon_DTS <- mdy_hm(usage$logon_DTS)
usage$logon_DTS <- as.Date(usage$logon_DTS)

usage$logout_DTS <- mdy_hm(usage$logout_DTS)


usage$logout_DTS <- as.Date(usage$logout_DTS)

Data <- usage %>% group_by(logon_DTS) %>%


summarise(frequency=length(unique(userid)))

## `summarise()` ungrouping output (override with `.groups` argument)

mean(Data$frequency)

This study source was downloaded by 100000831408399 from CourseHero.com on 02-18-2022 16:27:24 GMT -06:00

https://www.coursehero.com/file/107423326/BIS-581-Server-logsdocx/
## [1] 327.7106

What is the highest number of users per day


max(Data$frequency)

## [1] 1395

Group specific details shown below top 3 users by number of times logged in from off-site,
top 3 applications by length of time run
usage$stop <-na_if(usage$stop,"1900-01-01 00:00:00")
usage$start <- na_if(usage$start,"1900-01-01 00:00:00")

usage$start <- ymd_hms(usage$start)

## Warning: 10 failed to parse.

usage$stop <- ymd_hms(usage$stop)

## Warning: 1 failed to parse.

usage$time <- difftime(usage$stop, usage$start, units = "mins")

usage$remote_ip[usage$remote_ip==""] <- "unknown"


usage$sample.ip <- ifelse(usage$remote_ip != "unknown",
{substr(usage$remote_ip, start = 1, stop=7)}, {"unknown"})
usage$site <- ifelse(usage$sample.ip!="unknown",
{ifelse(usage$sample.ip!="141.209","offsite","onsite")},{"unknown"})
pos <- which(usage$sample.ip != "unknown"& usage$sample.ip !=
"141.209")
Data_4 <- data.frame(userid = usage$userid[pos], remote_ip =
usage$remote_ip[pos], app_name = usage$app_name[pos],
time=usage$time[pos], site = usage$site[pos])

#top 3 users by number of times logged in from off-site


Data_4%>%
group_by(userid)%>%
count()%>%
arrange(desc(n),sort=TRUE)%>%
head(3)

## # A tibble: 3 x 2
## # Groups: userid [3]

This study source was downloaded by 100000831408399 from CourseHero.com on 02-18-2022 16:27:24 GMT -06:00

https://www.coursehero.com/file/107423326/BIS-581-Server-logsdocx/
## userid n
## <chr> <int>
## 1 userid580 4326
## 2 userid11633 3647
## 3 userid1152 3506

ggplot(usage, aes(x= site,fill=site))+geom_bar()+theme_bw()+


ggtitle(label="CLASSIFICATION OF USERID", subtitle="Number of offsite,
onsite and unknown users.")

#top 3 applications by length of time run


usage%>%
select("userid","app_name","time")%>%
group_by(app_name)%>%
arrange(desc(time), sort=TRUE)%>%
head(3)

## # A tibble: 3 x 3
## # Groups: app_name [2]
## userid app_name time
## <chr> <chr> <drtn>
## 1 userid25641 sas 229156 mins
## 2 userid12283 sas 173142 mins
## 3 userid12283 iexplore 171659 mins

This study source was downloaded by 100000831408399 from CourseHero.com on 02-18-2022 16:27:24 GMT -06:00

https://www.coursehero.com/file/107423326/BIS-581-Server-logsdocx/
Powered by TCPDF (www.tcpdf.org)

You might also like