DR - Eldirdiri Fadol Ibrahim (R+Python Specialist)

Search Anaconda Cloud  View  Help  derderi 
derderi / notebooks / ai-1  0
Jupyter Notebook
Notebook Files Labels Badges Settings
You are viewing 2020.08.26.1623  Download
Another way expressing the Features (plots)
By:Dr.Eldirdiri Fadol Ibrahim Fadol
Scientific Research Center Founder & UP Intiative
Private Scientific Researcher
Scientific Data Analyst- SUDAN
27th August 2020
In [41]: import pandas as pd

import matplotlib.pyplot as plt # For plotting graphs
from sklearn.metrics import mean_squared_error as MSE
from math import sqrt
from statsmodels.tsa.api import SimpleExpSmoothing, Holt
from statsmodels.tsa.holtwinters import ExponentialSmoothing
from statsmodels.tsa.stattools import adfuller # for Dickey Fuller test
from statsmodels.tsa.stattools import acf, pacf # for p,q in Arima Model
from statsmodels.tsa.seasonal import seasonal_decompose
from statsmodels.tsa.arima_model import ARIMA
import statsmodels.api as sm
import numpy as np
# Load Dataset
train= pd.read_csv('Train.csv')
test=data = pd.read_csv('Test.csv')
In [42]: # make a copy of original dataset

train['Datetime'] = pd.to_datetime(train.Datetime,format='%d-%m-%Y %H:%M')
test['Datetime'] = pd.to_datetime(test.Datetime,format='%d-%m-%Y %H:%M')
In [43]: valid = train.iloc[16056:18287, :]

train = train.iloc[0:16055, :]
In [44]: # Visualize trainin-validation data split

plt.figure(figsize=(40,20))
plt.plot(train.Datetime, train['Count'], label='train')
plt.plot(valid.Datetime, valid['Count'], label='validation')
plt.xlabel("Datetime")
plt.ylabel("Passenger count")
plt.legend(loc='best')
plt.show()
In [45]: # Naive method to predict time series

y_hat = valid.copy()
# Assume that all next values will be the same as last observed value
y_hat['Count'] = train['Count'][len(train)-1]
In [46]: # Visualize Naive method predictions

plt.plot(y_hat.Datetime, y_hat['Count'], label='Naive Forecast')
plt.xlabel('Datetime')
plt.ylabel('Passenger count')
plt.show()
In [47]: rmse = pd.DataFrame(columns=['Method', 'RMSE'])
In [48]: # Calculate RMSE for Naive method

rmse.loc[len(rmse)]="Naive", sqrt(MSE(valid.Count, y_hat.Count))
In [49]: # Moving Average Method to predict time series
# last 10 days
y_hat['Count'] = train['Count'].rolling(10).mean().iloc[-1]
# Calculate RMSE for Moving average 10 days
rmse.loc[len(rmse)]="Moving Average 10D", sqrt(MSE(valid.Count, y_hat.Count))
# last 20 days
# last 50 days
# RMSE of 10 days is better than 20 and 50 days

# Thus predictions are getting weaker as we increase number of observations
# Visualize Moving Average predictions with window size of 10 days

plt.plot(y_hat.Datetime, y_hat['Count'], label='Moving average 10 days forecast')
plt.show()
# Simple Exponential Smoothing to predict time series
y_hat = valid.copy()
fit1 = SimpleExpSmoothing(train['Count']).fit(smoothing_level=0.1, optimized=False)
y_hat['Count'] = fit1.forecast(len(valid)+1)
# Calculate RMSE for SES 0.1
rmse.loc[len(rmse)]="Simple Exp Smoothing 0.1", sqrt(MSE(valid.Count, y_hat.Count))


# Visualize Simple Exp Smoothing predictions with smoothing const of 0.2

plt.plot(y_hat.Datetime, y_hat['Count'], label='Simple Exp Smoothing forecast')
plt.show()
# Holt's Linear Trend Model to predcit time series
# Similar to SES but also takes trend into account
# Visualize the trend in data

sm.tsa.seasonal_decompose(np.asarray(train['Count']), freq=24).plot()
result = sm.tsa.stattools.adfuller(train['Count'])
plt.show()
# We can see that the trend is increasing

# Thus Holt's linear trend model will perform better than above methods
fit1 = Holt(train['Count']).fit(smoothing_level = 0.1,smoothing_slope = 0.0001)

y_hat['Count'] = fit1.forecast(len(valid) + 1)
# Calculate RMSE for Holt's Linear Trending Model

rmse.loc[len(rmse)]="Holt's Linear Trend 0.0001", sqrt(MSE(valid.Count, y_hat.Count))
# Visualize Holt's predictions

plt.plot(y_hat.Datetime, y_hat['Count'], label='Holts Linear Trending Forecast')
plt.show()
<ipython-input-49-e369170aa96a>:67: FutureWarning: the 'freq'' keyword is deprecated, use 'period' inste

ad
sm.tsa.seasonal_decompose(np.asarray(train['Count']), freq=24).plot()
C:\Users\compu lab\anaconda3\lib\site-packages\statsmodels\tsa\holtwinters.py:731: RuntimeWarning: inval

id value encountered in greater_equal
loc = initial_p >= ub
In [50]: valid = data.iloc[16056:18287, :]

data = data.iloc[0:16055, :]
In [ ]:
In [ ]:
Anaconda Cloud Community

Gallery Anaconda Community
About Open Source
Documentation NumFOCUS
Support Support
About Anaconda, Inc. Developer Blog
Download Anaconda
PRIVACY POLICY | EULA (Anaconda Cloud v2.33.29) © 2020 Anaconda, Inc. All Rights Reserved.

DR - Eldirdiri Fadol Ibrahim (R+Python Specialist)

Uploaded by

Copyright:

Available Formats

You might also like

DR - Eldirdiri Fadol Ibrahim (R+Python Specialist)

Uploaded by

Document Information

Original Title

Copyright

Available Formats

Share this document

Share or Embed Document

Sharing Options

Did you find this document useful?

Is this content inappropriate?

Copyright:

Available Formats

DR - Eldirdiri Fadol Ibrahim (R+Python Specialist)

Uploaded by

Copyright:

Available Formats

Search Anaconda Cloud  View  Help  derderi 

derderi / notebooks / ai-1  0

Notebook Files Labels Badges Settings

You are viewing 2020.08.26.1623  Download

Another way expressing the Features (plots)

By:Dr.Eldirdiri Fadol Ibrahim Fadol

Scientific Research Center Founder & UP Intiative

Private Scientific Researcher

Scientific Data Analyst- SUDAN

27th August 2020

In [41]: import pandas as pd

In [42]: # make a copy of original dataset

In [43]: valid = train.iloc[16056:18287, :]

In [44]: # Visualize trainin-validation data split

In [45]: # Naive method to predict time series

In [46]: # Visualize Naive method predictions

In [47]: rmse = pd.DataFrame(columns=['Method', 'RMSE'])

In [48]: # Calculate RMSE for Naive method

In [49]: # Moving Average Method to predict time series

# RMSE of 10 days is better than 20 and 50 days

# Visualize Moving Average predictions with window size of 10 days

# Simple Exponential Smoothing to predict time series

fit1 = SimpleExpSmoothing(train['Count']).fit(smoothing_level=0.2, optimized=False)

fit1 = SimpleExpSmoothing(train['Count']).fit(smoothing_level=0.6, optimized=False)

# Visualize Simple Exp Smoothing predictions with smoothing const of 0.2

# Holt's Linear Trend Model to predcit time series

# Similar to SES but also takes trend into account

# Visualize the trend in data

# We can see that the trend is increasing

fit1 = Holt(train['Count']).fit(smoothing_level = 0.1,smoothing_slope = 0.0001)

# Calculate RMSE for Holt's Linear Trending Model

# Visualize Holt's predictions

<ipython-input-49-e369170aa96a>:67: FutureWarning: the 'freq'' keyword is deprecated, use 'period' inste

C:\Users\compu lab\anaconda3\lib\site-packages\statsmodels\tsa\holtwinters.py:731: RuntimeWarning: inval

In [50]: valid = data.iloc[16056:18287, :]

Anaconda Cloud Community

You might also like