Professional Documents
Culture Documents
Final R Code
Final R Code
packages("tidyverse")
library(tidyverse)
#Import file
df <- read_csv("/Users/tabish/Downloads/Analyst Project/flight_data_SAN.csv")
#Convert Timestamp from Character to Date format. This is required to extract Date, year,
month, hour etc.
df$scheduled_arrival_dttm <- mdy_hm(df$scheduled_arrival_dttm)
df$scheduled_departure_dttm <- mdy_hm(df$scheduled_departure_dttm)
df$actual_departure_dttm <- mdy_hm(df$actual_departure_dttm)
df$actual_arrival_dttm <- mdy_hm(df$actual_arrival_dttm)
# extract year, month, day, hour and minute from timestamp (actual departure timestamp)
df$act_dep_year <- year(df$actual_departure_dttm)
df$act_dep_month <- month(df$actual_departure_dttm)
df$act_dep_day <- day(df$actual_departure_dttm)
df$act_dep_hour <- hour(df$actual_departure_dttm)
df$act_dep_min <- minute(df$actual_departure_dttm)
#convert NAs to 0
sum(is.na(df$taxiout))
index <- is.na(df$taxiout)
df$taxiout[index] <-0
install.packages("tidyverse")
library(tidyverse)
install.packages("fpp")
library(fpp)
#Import file
df <- read_csv("/Users/tabish/Downloads/Analyst Project/flight_data_SAN.csv")
#Convert Timestamp from Character to Date format. This is required to extract Date, year,
month, hour etc.
df_new$scheduled_arrival_dttm <- mdy_hm(df_new$scheduled_arrival_dttm)
df_new$scheduled_departure_dttm <- mdy_hm(df_new$scheduled_departure_dttm)
df_new$actual_departure_dttm <- mdy_hm(df_new$actual_departure_dttm)
df_new$actual_arrival_dttm <- mdy_hm(df_new$actual_arrival_dttm)
#Take Diff
tsdatadiff=diff(tsdata)
plot(tsdatadiff)
#ETS Test
tsdataets=ets(tsdata)
summary(tsdataets)
predictets=forecast(tsdataets, h=3)
plot(predictets)
plot(tsdata, col="blue")
lines(predictets$fitted,col="red")
lines(predictets$mean,col="green")
checkresiduals(predictets)
#ARIMA Test
acf(coredata(tsdata),lag.max = 30)
pacf(coredata(tsdata),lag.max = 30)
dataarima=auto.arima(tsdata, trace=TRUE)
predictarima=forecast(dataarima, h=3)
plot(predictarima)
#Adding average taxi out time against each day of the year (Summarize)
mydataday_summary <-mydataday %>% group_by(Day=(mydataday$df_new.act_dep_date))
%>% summarize(TaxiOutTime = mean(df_new.taxiout))
#Take Diff
tsdatadiff=diff(tsdata)
plot(tsdatadiff)
#ETS Test
tsdataets=ets(tsdata)
summary(tsdataets)
predictets=forecast(tsdataets, h=90)
plot(predictets)
plot(tsdata, col="blue")
lines(predictets$fitted,col="red")
lines(predictets$mean,col="black")
checkresiduals(predictets)
summary(predictets)
#ARIMA Test
acf(coredata(tsdata),lag.max = 30)
pacf(coredata(tsdata),lag.max = 30)
acf(coredata(tsdatadiff),lag.max = 30)
pacf(coredata(tsdatadiff),lag.max = 30)
dataarima=auto.arima(tsdata, D=1, trace=TRUE, seasonal = TRUE)
predictarima=forecast(dataarima, h=90)
plot(predictarima)
plot(tsdata, col="black")
lines(predictarima$fitted,col="red")
lines(predictarima$mean,col="blue")
#plot(predictarima$fitted,col="red")
#plot(tsdata)
mydatahour<-data.frame(df_new$act_dephour,df_new$taxiout)
#convert date to character. This is required to check for NAs in date column and convert them
to 0. NAs are not replaced to 0 if the column format is Date type
mydatahour$df_new.act_dephour <- as.character(mydatahour$df_new.act_dephour)
#ADF Test
adf.test(tsdata)
#Stationary series
#Take Diff
tsdatadiff=diff(tsdata)
plot(tsdatadiff)
#ETS Test
tsdataets=ets(tsdata)
summary(tsdataets)
predictets=forecast(tsdataets, h=1530)
plot(predictets)
plot(tsdata, col="blue")
lines(predictets$fitted,col="red")
lines(predictets$mean,col="black")
checkresiduals(predictets)
summary(predictets)
#ARIMA Test
acf(coredata(tsdata),lag.max = 30)
pacf(coredata(tsdata),lag.max = 30)
dataarima=auto.arima(tsdata, trace=TRUE, seasonal = TRUE)
predictarima=forecast(dataarima, h=1530)
plot(predictarima)
plot(tsdata, col="blue")
lines(predictarima$fitted,col="red")
lines(predictarima$mean,col="green")
checkresiduals(predictarima)
summary(predictarima)