Download as pdf or txt
Download as pdf or txt
You are on page 1of 12

1/5/24, 20:40 Resolución Caso 2 - Milagro

ANEXO 4 - CASO MILAGROS


Constanza Escobar
IND 3100
In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import statsmodels.api as sm
import statsmodels.formula.api as smf
from statsmodels.stats.diagnostic import het_breuschpagan
from statsmodels.stats.outliers_influence import variance_inflation_factor
from statsmodels.tools.eval_measures import rmse
from scipy import stats
from sklearn.model_selection import train_test_split
from typing import Union
%matplotlib inline
from sklearn.linear_model import LinearRegression
from sklearn.metrics import r2_score

In [2]:
import matplotlib.pyplot as plt
from scipy.interpolate import interpn
import numpy as np
from sklearn.linear_model import LinearRegression,Lasso
import matplotlib.pyplot as plt
def R2(predictions,resp):
N=1
try:
N=np.shape(resp)[1]
prom=np.zeros(N)
for i in range(N):
prom[i]=np.mean(resp[:,i])
Error=[]
SST=0#Varianza total
SSReg=0#Varianza explicada
for i in range(0,len(resp)):
Error.append(resp[i]-(predictions[i]))
SST+=((resp[i]) - prom)**2
SSReg+=((resp[i])-(predictions[i]))**2
return 1-SSReg/SST
except:
prom=np.mean(resp)
Error=[]
SST=0#Varianza total
SSReg=0#Varianza explicada
for i in range(0,len(resp)):
Error.append(resp[i]-(predictions[i]))
SST+=((resp[i]) - prom)**2
SSReg+=((resp[i])-(predictions[i]))**2
return 1-SSReg/SST

def plot_figura(x,y):

file:///Users/constanzaescobarsalas/Downloads/Resolución Caso 2 - Milagro.html 1/12


1/5/24, 20:40 Resolución Caso 2 - Milagro
##asumo que x es la prediccion e y el real
lm=LinearRegression()
lm.fit(x.reshape(len(x),1),y.reshape(len(y),1))
a=lm.coef_[0,0]
b=lm.intercept_[0]
leyenda=['y_pred=y_real' ,'R2='+'{:.2f}'.format( R2(x,y) )]
plt.plot([np.min(x),np.max(x)],[np.min(x),np.max(x)],'r')
plt.scatter(x,y)
plt.legend(leyenda)
plt.xlabel('Prediccion')
plt.ylabel('Real')
plt.title('Validacion')
return

In [3]:
import pandas as pd
# Cargamos los datos desde Drive
milagro_df = pd.read_excel('Milagro.xlsx', sheet_name = 'SITE-DATA-TRAIN')
milagro_test = pd.read_excel('Milagro.xlsx', sheet_name = 'SITE-DATA-TEST')
store_48 = pd.read_excel('Milagro.xlsx', sheet_name = 'SITE-DATA-48-STORES-UNDER

/opt/anaconda3/lib/python3.9/site-packages/openpyxl/worksheet/_reader.py:312: Us
erWarning: Unknown extension is not supported and will be removed
warn(msg)

In [4]:
milagro_df.head()

Out[4]: STORENUM STATE ANNUAL LCI NEARCOMP NEARMIL FREESTAND GINI HO


PROFIT
0 1 AZ 414343.201689 5.989973 2 5.3 0 0.3889 95
1 2 AZ 514643.961909 8.057567 6 13.1 0 0.2434 77
2 3 AZ 443096.431605 6.267259 0 30.2 0 0.3179 84
3 4 AZ 495031.136712 8.566326 0 29.4 0 0.4132 142
4 5 AZ 962170.030398 4.077841 6 10.1 0 0.4911 116
5 rows × 30 columns
In [5]:
milagro_df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 374 entries, 0 to 373
Data columns (total 30 columns):
# Column Non-Null Count Dtype
--- ------ -------------- -----
0 STORENUM 374 non-null int64
1 STATE 374 non-null object
2 ANNUAL PROFIT 374 non-null float64
3 LCI 374 non-null float64
4 NEARCOMP 374 non-null int64
5 NEARMIL 374 non-null float64
6 FREESTAND 374 non-null int64
7 GINI 374 non-null float64
8 HOUSEMED 374 non-null float64

file:///Users/constanzaescobarsalas/Downloads/Resolución Caso 2 - Milagro.html 2/12


1/5/24, 20:40 Resolución Caso 2 - Milagro
9 SQFT 374 non-null int64
10 INTERSECT 374 non-null int64
11 POP 374 non-null int64
12 AGEMED 374 non-null float64
13 NONCITZN 374 non-null float64
14 AGINC 374 non-null float64
15 MEDINC 374 non-null float64
16 NOHS 374 non-null float64
17 HSGRAD 374 non-null float64
18 SOMECOL 374 non-null float64
19 COLGRAD 374 non-null float64
20 POSTGRAD 374 non-null float64
21 COM0 374 non-null float64
22 COM15 374 non-null float64
23 COM30 374 non-null float64
24 COM60 374 non-null float64
25 TRDRIVE 374 non-null float64
26 TRPUBLIC 374 non-null float64
27 TRWALK 374 non-null float64
28 TRHOME 374 non-null float64
29 TROTHER 374 non-null float64
dtypes: float64(23), int64(6), object(1)
memory usage: 87.8+ KB

In [6]:
milagro_df.columns

Index(['STORENUM', 'STATE', 'ANNUAL PROFIT', 'LCI', 'NEARCOMP', 'NEARMIL',


Out[6]:
'FREESTAND', 'GINI', 'HOUSEMED', 'SQFT', 'INTERSECT', 'POP', 'AGEMED',
'NONCITZN', 'AGINC', 'MEDINC', 'NOHS', 'HSGRAD', 'SOMECOL', 'COLGRAD',
'POSTGRAD', 'COM0', 'COM15', 'COM30', 'COM60', 'TRDRIVE', 'TRPUBLIC',
'TRWALK', 'TRHOME', 'TROTHER'],
dtype='object')

In [7]:
y = milagro_df['ANNUAL PROFIT']

In [8]:
candidatas = ['LCI', 'NEARCOMP', 'NEARMIL',
'FREESTAND', 'GINI', 'HOUSEMED', 'SQFT', 'INTERSECT', 'POP', 'AGEMED',
'NONCITZN', 'AGINC', 'MEDINC', 'NOHS', 'HSGRAD', 'SOMECOL', 'COLGRAD',
'POSTGRAD', 'COM0', 'COM15', 'COM30', 'COM60', 'TRDRIVE', 'TRPUBLIC',
'TRWALK', 'TRHOME', 'TROTHER']

In [9]:
import seaborn as sns
corr = milagro_df.corr()
sns.set_theme(rc={'figure.figsize':(20,30)})

sns.heatmap(corr, cmap="Blues", annot=True)

<AxesSubplot:>
Out[9]:

file:///Users/constanzaescobarsalas/Downloads/Resolución Caso 2 - Milagro.html 3/12


1/5/24, 20:40 Resolución Caso 2 - Milagro

file:///Users/constanzaescobarsalas/Downloads/Resolución Caso 2 - Milagro.html 4/12


1/5/24, 20:40 Resolución Caso 2 - Milagro

In [10]:
def Step_Forward_Feature_Selection(dataframe, candidatas,target, selected_vars =
df = dataframe
i=0
grafico = {} # Diccionario donde se irán guardando las variables seleccionad
while True:

#Variables que quedan para probar


restantes = list(set(candidatas) - set(selected_vars)) #Conjunto de posi

if restantes != []: # entramos al ciclo mientras queden variables por pr

R2 = pd.Series(index = restantes) # serie donde guardaremos los valo


selected_vars = list(selected_vars) # lista de variables seleccionad

# En este ciclo for entrenamos los modelos de regresión lineal con


for var in restantes: # recorremos las variables que quedan por prob
# utilizando LinearRegression

lr = LinearRegression()
lr.fit(df[selected_vars + [var]], target) #evaluamos el modelo p
y_predict = lr.predict(df[selected_vars + [var]])
r2 = r2_score(target,y_predict) # Calculamos el r2 del modelo
R2[var] = r2 # guardamos el r2 y la variable

max_R2 =R2.max() # calculamos el valor mínimo del conjunto de MAE


max_var = R2.idxmax() # indice de la variable que tiene el mínimo va
grafico['+'.join(selected_vars) + '+' + max_var] = max_R2 # guardam
selected_vars.append(max_var) # guardamos la variale

else : # cuando ya no queden variables restante en el conjunto de posibl


sns.set_theme(rc={'figure.figsize':(10,7)})
grafico = pd.DataFrame(grafico.items(), columns = ['var','values'])
plt.xticks(rotation= 90)
plt.plot(grafico['var'], grafico['values'], 'b-')
plt.title('Gráfico de R2 vs Variables Seleccionadas')
a = grafico['values'].max()
x = grafico['var'].loc[grafico['values'] == a]

print(grafico)

return print(f"La variables seleccionadas corresponden a {x}")

break

In [11]:
Step_Forward_Feature_Selection(dataframe = milagro_df, candidatas = candidatas,

/var/folders/64/cfyl69qd61dbq5r4h0w__0z40000gn/T/ipykernel_4589/295797521.py:12:
DeprecationWarning: The default dtype for empty Series will be 'object' instead
of 'float64' in a future version. Specify a dtype explicitly to silence this war
ning.
R2 = pd.Series(index = restantes) # serie donde guardaremos los valores r2
file:///Users/constanzaescobarsalas/Downloads/Resolución Caso 2 - Milagro.html 5/12
1/5/24, 20:40 Resolución Caso 2 - Milagro
/var/folders/64/cfyl69qd61dbq5r4h0w__0z40000gn/T/ipykernel_4589/295797521.py:12:
DeprecationWarning: The default dtype for empty Series will be 'object' instead
of 'float64' in a future version. Specify a dtype explicitly to silence this war
ning.
R2 = pd.Series(index = restantes) # serie donde guardaremos los valores r2
/var/folders/64/cfyl69qd61dbq5r4h0w__0z40000gn/T/ipykernel_4589/295797521.py:12:
DeprecationWarning: The default dtype for empty Series will be 'object' instead
of 'float64' in a future version. Specify a dtype explicitly to silence this war
ning.
R2 = pd.Series(index = restantes) # serie donde guardaremos los valores r2
/var/folders/64/cfyl69qd61dbq5r4h0w__0z40000gn/T/ipykernel_4589/295797521.py:12:
DeprecationWarning: The default dtype for empty Series will be 'object' instead
of 'float64' in a future version. Specify a dtype explicitly to silence this war
ning.
R2 = pd.Series(index = restantes) # serie donde guardaremos los valores r2
/var/folders/64/cfyl69qd61dbq5r4h0w__0z40000gn/T/ipykernel_4589/295797521.py:12:
DeprecationWarning: The default dtype for empty Series will be 'object' instead
of 'float64' in a future version. Specify a dtype explicitly to silence this war
ning.
R2 = pd.Series(index = restantes) # serie donde guardaremos los valores r2
/var/folders/64/cfyl69qd61dbq5r4h0w__0z40000gn/T/ipykernel_4589/295797521.py:12:
DeprecationWarning: The default dtype for empty Series will be 'object' instead
of 'float64' in a future version. Specify a dtype explicitly to silence this war
ning.
R2 = pd.Series(index = restantes) # serie donde guardaremos los valores r2
/var/folders/64/cfyl69qd61dbq5r4h0w__0z40000gn/T/ipykernel_4589/295797521.py:12:
DeprecationWarning: The default dtype for empty Series will be 'object' instead
of 'float64' in a future version. Specify a dtype explicitly to silence this war
ning.
R2 = pd.Series(index = restantes) # serie donde guardaremos los valores r2
/var/folders/64/cfyl69qd61dbq5r4h0w__0z40000gn/T/ipykernel_4589/295797521.py:12:
DeprecationWarning: The default dtype for empty Series will be 'object' instead
of 'float64' in a future version. Specify a dtype explicitly to silence this war
ning.
R2 = pd.Series(index = restantes) # serie donde guardaremos los valores r2
/var/folders/64/cfyl69qd61dbq5r4h0w__0z40000gn/T/ipykernel_4589/295797521.py:12:
DeprecationWarning: The default dtype for empty Series will be 'object' instead
of 'float64' in a future version. Specify a dtype explicitly to silence this war
ning.
R2 = pd.Series(index = restantes) # serie donde guardaremos los valores r2
/var/folders/64/cfyl69qd61dbq5r4h0w__0z40000gn/T/ipykernel_4589/295797521.py:12:
DeprecationWarning: The default dtype for empty Series will be 'object' instead
of 'float64' in a future version. Specify a dtype explicitly to silence this war
ning.
R2 = pd.Series(index = restantes) # serie donde guardaremos los valores r2
/var/folders/64/cfyl69qd61dbq5r4h0w__0z40000gn/T/ipykernel_4589/295797521.py:12:
DeprecationWarning: The default dtype for empty Series will be 'object' instead
of 'float64' in a future version. Specify a dtype explicitly to silence this war
ning.
R2 = pd.Series(index = restantes) # serie donde guardaremos los valores r2
/var/folders/64/cfyl69qd61dbq5r4h0w__0z40000gn/T/ipykernel_4589/295797521.py:12:
DeprecationWarning: The default dtype for empty Series will be 'object' instead
of 'float64' in a future version. Specify a dtype explicitly to silence this war
ning.
R2 = pd.Series(index = restantes) # serie donde guardaremos los valores r2
/var/folders/64/cfyl69qd61dbq5r4h0w__0z40000gn/T/ipykernel_4589/295797521.py:12:
DeprecationWarning: The default dtype for empty Series will be 'object' instead
of 'float64' in a future version. Specify a dtype explicitly to silence this war
ning.
R2 = pd.Series(index = restantes) # serie donde guardaremos los valores r2
file:///Users/constanzaescobarsalas/Downloads/Resolución Caso 2 - Milagro.html 6/12
1/5/24, 20:40 Resolución Caso 2 - Milagro
/var/folders/64/cfyl69qd61dbq5r4h0w__0z40000gn/T/ipykernel_4589/295797521.py:12:
DeprecationWarning: The default dtype for empty Series will be 'object' instead
of 'float64' in a future version. Specify a dtype explicitly to silence this war
ning.
R2 = pd.Series(index = restantes) # serie donde guardaremos los valores r2
/var/folders/64/cfyl69qd61dbq5r4h0w__0z40000gn/T/ipykernel_4589/295797521.py:12:
DeprecationWarning: The default dtype for empty Series will be 'object' instead
of 'float64' in a future version. Specify a dtype explicitly to silence this war
ning.
R2 = pd.Series(index = restantes) # serie donde guardaremos los valores r2
/var/folders/64/cfyl69qd61dbq5r4h0w__0z40000gn/T/ipykernel_4589/295797521.py:12:
DeprecationWarning: The default dtype for empty Series will be 'object' instead
of 'float64' in a future version. Specify a dtype explicitly to silence this war
ning.
R2 = pd.Series(index = restantes) # serie donde guardaremos los valores r2
/var/folders/64/cfyl69qd61dbq5r4h0w__0z40000gn/T/ipykernel_4589/295797521.py:12:
DeprecationWarning: The default dtype for empty Series will be 'object' instead
of 'float64' in a future version. Specify a dtype explicitly to silence this war
ning.
R2 = pd.Series(index = restantes) # serie donde guardaremos los valores r2
/var/folders/64/cfyl69qd61dbq5r4h0w__0z40000gn/T/ipykernel_4589/295797521.py:12:
DeprecationWarning: The default dtype for empty Series will be 'object' instead
of 'float64' in a future version. Specify a dtype explicitly to silence this war
ning.
R2 = pd.Series(index = restantes) # serie donde guardaremos los valores r2
/var/folders/64/cfyl69qd61dbq5r4h0w__0z40000gn/T/ipykernel_4589/295797521.py:12:
DeprecationWarning: The default dtype for empty Series will be 'object' instead
of 'float64' in a future version. Specify a dtype explicitly to silence this war
ning.
R2 = pd.Series(index = restantes) # serie donde guardaremos los valores r2
/var/folders/64/cfyl69qd61dbq5r4h0w__0z40000gn/T/ipykernel_4589/295797521.py:12:
DeprecationWarning: The default dtype for empty Series will be 'object' instead
of 'float64' in a future version. Specify a dtype explicitly to silence this war
ning.
R2 = pd.Series(index = restantes) # serie donde guardaremos los valores r2
/var/folders/64/cfyl69qd61dbq5r4h0w__0z40000gn/T/ipykernel_4589/295797521.py:12:
DeprecationWarning: The default dtype for empty Series will be 'object' instead
of 'float64' in a future version. Specify a dtype explicitly to silence this war
ning.
R2 = pd.Series(index = restantes) # serie donde guardaremos los valores r2
/var/folders/64/cfyl69qd61dbq5r4h0w__0z40000gn/T/ipykernel_4589/295797521.py:12:
DeprecationWarning: The default dtype for empty Series will be 'object' instead
of 'float64' in a future version. Specify a dtype explicitly to silence this war
ning.
R2 = pd.Series(index = restantes) # serie donde guardaremos los valores r2
/var/folders/64/cfyl69qd61dbq5r4h0w__0z40000gn/T/ipykernel_4589/295797521.py:12:
DeprecationWarning: The default dtype for empty Series will be 'object' instead
of 'float64' in a future version. Specify a dtype explicitly to silence this war
ning.
R2 = pd.Series(index = restantes) # serie donde guardaremos los valores r2
/var/folders/64/cfyl69qd61dbq5r4h0w__0z40000gn/T/ipykernel_4589/295797521.py:12:
DeprecationWarning: The default dtype for empty Series will be 'object' instead
of 'float64' in a future version. Specify a dtype explicitly to silence this war
ning.
R2 = pd.Series(index = restantes) # serie donde guardaremos los valores r2
/var/folders/64/cfyl69qd61dbq5r4h0w__0z40000gn/T/ipykernel_4589/295797521.py:12:
DeprecationWarning: The default dtype for empty Series will be 'object' instead
of 'float64' in a future version. Specify a dtype explicitly to silence this war
ning.
R2 = pd.Series(index = restantes) # serie donde guardaremos los valores r2
file:///Users/constanzaescobarsalas/Downloads/Resolución Caso 2 - Milagro.html 7/12
1/5/24, 20:40 Resolución Caso 2 - Milagro
/var/folders/64/cfyl69qd61dbq5r4h0w__0z40000gn/T/ipykernel_4589/295797521.py:12:
DeprecationWarning: The default dtype for empty Series will be 'object' instead
of 'float64' in a future version. Specify a dtype explicitly to silence this war
ning.
R2 = pd.Series(index = restantes) # serie donde guardaremos los valores r2
/var/folders/64/cfyl69qd61dbq5r4h0w__0z40000gn/T/ipykernel_4589/295797521.py:12:
DeprecationWarning: The default dtype for empty Series will be 'object' instead
of 'float64' in a future version. Specify a dtype explicitly to silence this war
ning.
R2 = pd.Series(index = restantes) # serie donde guardaremos los valores r2
var values
0 +AGINC 0.753905
1 AGINC+FREESTAND 0.869388
2 AGINC+FREESTAND+TRDRIVE 0.892886
3 AGINC+FREESTAND+TRDRIVE+POP 0.920259
4 AGINC+FREESTAND+TRDRIVE+POP+NEARCOMP 0.925679
5 AGINC+FREESTAND+TRDRIVE+POP+NEARCOMP+COLGRAD 0.931897
6 AGINC+FREESTAND+TRDRIVE+POP+NEARCOMP+COLGRAD+SQFT 0.937190
7 AGINC+FREESTAND+TRDRIVE+POP+NEARCOMP+COLGRAD+S... 0.941041
8 AGINC+FREESTAND+TRDRIVE+POP+NEARCOMP+COLGRAD+S... 0.943129
9 AGINC+FREESTAND+TRDRIVE+POP+NEARCOMP+COLGRAD+S... 0.945083
10 AGINC+FREESTAND+TRDRIVE+POP+NEARCOMP+COLGRAD+S... 0.945905
11 AGINC+FREESTAND+TRDRIVE+POP+NEARCOMP+COLGRAD+S... 0.946338
12 AGINC+FREESTAND+TRDRIVE+POP+NEARCOMP+COLGRAD+S... 0.946828
13 AGINC+FREESTAND+TRDRIVE+POP+NEARCOMP+COLGRAD+S... 0.947177
14 AGINC+FREESTAND+TRDRIVE+POP+NEARCOMP+COLGRAD+S... 0.947450
15 AGINC+FREESTAND+TRDRIVE+POP+NEARCOMP+COLGRAD+S... 0.947607
16 AGINC+FREESTAND+TRDRIVE+POP+NEARCOMP+COLGRAD+S... 0.947774
17 AGINC+FREESTAND+TRDRIVE+POP+NEARCOMP+COLGRAD+S... 0.947825
18 AGINC+FREESTAND+TRDRIVE+POP+NEARCOMP+COLGRAD+S... 0.947847
19 AGINC+FREESTAND+TRDRIVE+POP+NEARCOMP+COLGRAD+S... 0.947863
20 AGINC+FREESTAND+TRDRIVE+POP+NEARCOMP+COLGRAD+S... 0.947870
21 AGINC+FREESTAND+TRDRIVE+POP+NEARCOMP+COLGRAD+S... 0.947874
22 AGINC+FREESTAND+TRDRIVE+POP+NEARCOMP+COLGRAD+S... 0.947877
23 AGINC+FREESTAND+TRDRIVE+POP+NEARCOMP+COLGRAD+S... 0.947877
24 AGINC+FREESTAND+TRDRIVE+POP+NEARCOMP+COLGRAD+S... 0.947877
25 AGINC+FREESTAND+TRDRIVE+POP+NEARCOMP+COLGRAD+S... 0.947877
26 AGINC+FREESTAND+TRDRIVE+POP+NEARCOMP+COLGRAD+S... 0.947877
La variables seleccionadas corresponden a 23 AGINC+FREESTAND+TRDRIVE+POP+NEA
RCOMP+COLGRAD+S...
24 AGINC+FREESTAND+TRDRIVE+POP+NEARCOMP+COLGRAD+S...
25 AGINC+FREESTAND+TRDRIVE+POP+NEARCOMP+COLGRAD+S...
Name: var, dtype: object

file:///Users/constanzaescobarsalas/Downloads/Resolución Caso 2 - Milagro.html 8/12


1/5/24, 20:40 Resolución Caso 2 - Milagro

file:///Users/constanzaescobarsalas/Downloads/Resolución Caso 2 - Milagro.html 9/12


1/5/24, 20:40 Resolución Caso 2 - Milagro

Podemos ver que el r2 del modelo aumenta a medida que se van sumando variables predictoras
sin embargo se puede ver que llega a un plató cuando de 0.94 (con 8 variables predictoras). Sin
embargo, las primeras 4 que más explican la variabilidad son:
AGINC+FREESTAND+TRDRIVE+POP con un r2 de 0.925.
In [75]:
predictores = ['AGINC','FREESTAND','TRDRIVE','POP']

file:///Users/constanzaescobarsalas/Downloads/Resolución Caso 2 - Milagro.html 10/12


1/5/24, 20:40 Resolución Caso 2 - Milagro

In [76]:
x_train = milagro_df[predictores]
y_train = y

In [77]:
# Entrenamiento del modelo con las variables seleccionadasX

x_train = sm.add_constant(x_train, prepend=True).rename(columns={'const':'interc


modelo = sm.OLS(y_train, x_train)
modelo = modelo.fit()
print(modelo.summary())

OLS Regression Results


==============================================================================
Dep. Variable: ANNUAL PROFIT R-squared: 0.920
Model: OLS Adj. R-squared: 0.919
Method: Least Squares F-statistic: 1065.
Date: Wed, 01 May 2024 Prob (F-statistic): 3.92e-201
Time: 18:48:13 Log-Likelihood: -4923.0
No. Observations: 374 AIC: 9856.
Df Residuals: 369 BIC: 9876.
Df Model: 4
Covariance Type: nonrobust
==============================================================================
coef std err t P>|t| [0.025 0.975]
------------------------------------------------------------------------------
intercept 5.804e+05 3.31e+04 17.536 0.000 5.15e+05 6.46e+05
AGINC 0.0026 7.1e-05 36.551 0.000 0.002 0.003
FREESTAND 2.561e+05 2.27e+04 11.298 0.000 2.11e+05 3.01e+05
TRDRIVE -5.156e+05 4.03e+04 -12.806 0.000 -5.95e+05 -4.36e+05
POP 58.3202 5.182 11.255 0.000 48.130 68.510
==============================================================================
Omnibus: 36.124 Durbin-Watson: 2.030
Prob(Omnibus): 0.000 Jarque-Bera (JB): 144.212
Skew: 0.274 Prob(JB): 4.84e-32
Kurtosis: 5.992 Cond. No. 1.32e+09
==============================================================================

Notes:
[1] Standard Errors assume that the covariance matrix of the errors is correctly
specified.
[2] The condition number is large, 1.32e+09. This might indicate that there are
strong multicollinearity or other numerical problems.
/opt/anaconda3/lib/python3.9/site-packages/statsmodels/tsa/tsatools.py:142: Futu
reWarning: In a future version of pandas all arguments of concat except for the
argument 'objs' will be keyword-only
x = pd.concat(x[::order], 1)

In [78]:
x_train = milagro_df[predictores]
x_test = milagro_test[predictores]
y_test = milagro_test['ANNUAL PROFIT']
x_test_store48 = store_48[predictores]

In [79]:
x_test = sm.add_constant(x_test, prepend=True).rename(columns={'const':'intercep
y_pred = modelo.predict(x_test)

file:///Users/constanzaescobarsalas/Downloads/Resolución Caso 2 - Milagro.html 11/12


1/5/24, 20:40 Resolución Caso 2 - Milagro

/opt/anaconda3/lib/python3.9/site-packages/statsmodels/tsa/tsatools.py:142: Futu
reWarning: In a future version of pandas all arguments of concat except for the
argument 'objs' will be keyword-only
x = pd.concat(x[::order], 1)

In [80]:
x_test_store48 = sm.add_constant(x_test_store48, prepend=True).rename(columns={'
y_pred_store48 = modelo.predict(x_test_store48)
y_pred_store48.sum().round(2)

33983675.89
Out[80]:

In [74]:
sns.set_theme(rc={'figure.figsize':(8,7)})

plot_figura(y_pred.values, y_test.values)

file:///Users/constanzaescobarsalas/Downloads/Resolución Caso 2 - Milagro.html 12/12

You might also like