Download as pdf or txt
Download as pdf or txt
You are on page 1of 10

Auto Tuning multiple Timeseries SARIMAX Model — With ... https://nandakishorej8.medium.com/auto-tuning-multiple-tim...

1 of 10 11/16/2021, 9:22 AM
Auto Tuning multiple Timeseries SARIMAX Model — With ... https://nandakishorej8.medium.com/auto-tuning-multiple-tim...

2 of 10 11/16/2021, 9:22 AM
Auto Tuning multiple Timeseries SARIMAX Model — With ... https://nandakishorej8.medium.com/auto-tuning-multiple-tim...

3 of 10 11/16/2021, 9:22 AM
Auto Tuning multiple Timeseries SARIMAX Model — With ... https://nandakishorej8.medium.com/auto-tuning-multiple-tim...

df['monthyyyymm_no'] =
pd.to_datetime(df['monthyyyymm_no'], format="%Y%m")
df = df.set_index('monthyyyymm_no', drop=True)
col_name="areacode"
first_col = df.pop(col_name)
df=df.astype(float)
df.insert(0, col_name, first_col)
df=df.fillna(0)

params = {
0:[(1,1,1), (1,1,1,12)],
1:[(1,1,0), (1,1,1,12)],
2:[(1,1,0), (1,1,0,12)],
3:[(1,1,0), (0,1,0,12)],
4:[(1,1,1), (1,1,0,12)],
5:[(1,1,1), (2,1,0,12)],
6:[(1,1,2), (1,1,2,12)],
7:[(1,1,1), (1,1,2,12)],
8:[(1,1,1), (2,1,2,12)],
9:[(1,1,0), (1,1,2,12)],
10:[(2,1,1), (2,1,1,12)],
11:[(2,1,1), (1,1,1,12)],
12:[(2,1,1), (1,1,0,12)],
13:[(1,1,2), (2,1,2,12)],
14:[(1,1,2), (1,1,0,12)],
15:[(0,1,1), (1,1,1,12)]
}

4 of 10 11/16/2021, 9:22 AM
Auto Tuning multiple Timeseries SARIMAX Model — With ... https://nandakishorej8.medium.com/auto-tuning-multiple-tim...

p = q = range(0,3)
d = range(0,2)
pdq = list(itertools.product(p, d, q))
pdqs = [(x[0], x[1], x[2], 12) for x in
list(itertools.product(p, d, q))]

columns=['Cosmatics',
'Processed_Food','Furniture','Books']
area=[i for i in df['areacode'].unique()]
steps=16

#function returns dataframe with best parameters


def sarimax_gridsearch(ts,area,col,params,
maxiter=5000):

5 of 10 11/16/2021, 9:22 AM
Auto Tuning multiple Timeseries SARIMAX Model — With ... https://nandakishorej8.medium.com/auto-tuning-multiple-tim...

'''
Input:
ts : your time series data
area : areacode of the current model
col : product category on which the model is
being built
maxiter : number of iterations, increase if
your model isn't converging
frequency : default='M' for month. Change to
suit your time series frequency
e.g. 'D' for day, 'H' for hour, 'Y' for
year.

Return:
Print and save out top parameter combinations
Returns dataframe of parameter combinations
ranked by AIC
'''

# Run a grid search with pdq and seasonal pdq


parameters and get the best AIC value
ans = []
for i in range(0,len(params)):

mod = SARIMAX(ts,
order=params[i][0],
seasonal_order=params[i][1],
enforce_stationarity=False,
enforce_invertibility=False
)

output = mod.fit()
ans.append([area,col,params[i][0][0],params[i]
[0][1],params[i][0][2], params[i][1][0],params[i]
[1][1],params[i][1][2],params[i][1][3],params[i]
[0],params[i][1], output.aic])
print('SARIMAX {} x {} 12 : parameters {},{}
AIC={} '.format(area,col,params[i][0], params[i][1],
output.aic))

# Convert into dataframe


ans_df = pd.DataFrame(ans, columns=
['area','product','p','d','q',
'Ps','Ds','Qs','Ss','pdq','pdqs', 'aic'])

# Sort and return top combination


ans_df = ans_df.sort_values(by=

6 of 10 11/16/2021, 9:22 AM
Auto Tuning multiple Timeseries SARIMAX Model — With ... https://nandakishorej8.medium.com/auto-tuning-multiple-tim...

['area','product','aic'],ascending=True)[0:1]

return ans_df

best_param=pd.DataFrame() # --- Dataframe to store the


best parameters for each area-product combination

prediction=pd.DataFrame() # -- Dataframe to store the


predictins for next 16 months for each area-product
combination

ans = []
par={}

for t in area:
data=df[df['areacode']==t]
forecast=pd.DataFrame()
temp={}
for c in columns:
# STEP 1 - Calling function to get best paramaters for
each area-product combination
df_ans=sarimax_gridsearch(data[c],t,c,params)
# STEP 2 - Storing the best parameters for each area-
product combination
best_param=best_param.append(df_ans)
best_param=best_param.sort_values(by=
['area','product','aic'],ascending=True)

print('for area {} and product {}'.format(t,c))


print('best pdq is
{}'.format(best_param.loc[(best_param['area']==t) &
(best_param['product']==c)]['pdq'].iloc[0]))
print('best pdqs is

7 of 10 11/16/2021, 9:22 AM
Auto Tuning multiple Timeseries SARIMAX Model — With ... https://nandakishorej8.medium.com/auto-tuning-multiple-tim...

{}'.format(best_param.loc[(best_param['area']==t) &
(best_param['product']==c)]['pdqs'].iloc[0]))

# STEP 3- Building model with best parameters to make


forecast for next 16 months
smx = SARIMAX(
data[c],

order=best_param.loc[(best_param['area']==t) &
(best_param['product']==c)]['pdq'].iloc[0],

seasonal_order=best_param.loc[(best_param['area']==t) &
(best_param['product']==c)]['pdqs'].iloc[0],
enforce_stationarity=False,
enforce_invertibility=False
)

model = smx.fit()
predictions = model.get_forecast(
steps=steps
).predicted_mean

df_forecast = pd.DataFrame(predictions)
df_forecast.columns=[c]
temp[c]=(best_param.loc[(best_param['area']==t) &
(best_param['product']==c)]
['pdq'].iloc[0],best_param.loc[(best_param['area']==t)
& (best_param['product']==c)]['pdqs'].iloc[0])

forecast=pd.concat([forecast, df_forecast], axis=1)

forecast=forecast.fillna(0)
forecast['area']=t
par[t]=temp # -- storing the best parameters into
dictionary . this is an optional step

#step 4- storing the forecast of next 16 months for


each area-product combination
prediction=pd.concat([prediction,forecast],axis=0)

8 of 10 11/16/2021, 9:22 AM
Auto Tuning multiple Timeseries SARIMAX Model — With ... https://nandakishorej8.medium.com/auto-tuning-multiple-tim...

9 of 10 11/16/2021, 9:22 AM
Auto Tuning multiple Timeseries SARIMAX Model — With ... https://nandakishorej8.medium.com/auto-tuning-multiple-tim...

10 of 10 11/16/2021, 9:22 AM

You might also like