Professional Documents
Culture Documents
Resolución Caso 2 - Milagro
Resolución Caso 2 - Milagro
In [2]:
import matplotlib.pyplot as plt
from scipy.interpolate import interpn
import numpy as np
from sklearn.linear_model import LinearRegression,Lasso
import matplotlib.pyplot as plt
def R2(predictions,resp):
N=1
try:
N=np.shape(resp)[1]
prom=np.zeros(N)
for i in range(N):
prom[i]=np.mean(resp[:,i])
Error=[]
SST=0#Varianza total
SSReg=0#Varianza explicada
for i in range(0,len(resp)):
Error.append(resp[i]-(predictions[i]))
SST+=((resp[i]) - prom)**2
SSReg+=((resp[i])-(predictions[i]))**2
return 1-SSReg/SST
except:
prom=np.mean(resp)
Error=[]
SST=0#Varianza total
SSReg=0#Varianza explicada
for i in range(0,len(resp)):
Error.append(resp[i]-(predictions[i]))
SST+=((resp[i]) - prom)**2
SSReg+=((resp[i])-(predictions[i]))**2
return 1-SSReg/SST
def plot_figura(x,y):
In [3]:
import pandas as pd
# Cargamos los datos desde Drive
milagro_df = pd.read_excel('Milagro.xlsx', sheet_name = 'SITE-DATA-TRAIN')
milagro_test = pd.read_excel('Milagro.xlsx', sheet_name = 'SITE-DATA-TEST')
store_48 = pd.read_excel('Milagro.xlsx', sheet_name = 'SITE-DATA-48-STORES-UNDER
/opt/anaconda3/lib/python3.9/site-packages/openpyxl/worksheet/_reader.py:312: Us
erWarning: Unknown extension is not supported and will be removed
warn(msg)
In [4]:
milagro_df.head()
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 374 entries, 0 to 373
Data columns (total 30 columns):
# Column Non-Null Count Dtype
--- ------ -------------- -----
0 STORENUM 374 non-null int64
1 STATE 374 non-null object
2 ANNUAL PROFIT 374 non-null float64
3 LCI 374 non-null float64
4 NEARCOMP 374 non-null int64
5 NEARMIL 374 non-null float64
6 FREESTAND 374 non-null int64
7 GINI 374 non-null float64
8 HOUSEMED 374 non-null float64
In [6]:
milagro_df.columns
In [7]:
y = milagro_df['ANNUAL PROFIT']
In [8]:
candidatas = ['LCI', 'NEARCOMP', 'NEARMIL',
'FREESTAND', 'GINI', 'HOUSEMED', 'SQFT', 'INTERSECT', 'POP', 'AGEMED',
'NONCITZN', 'AGINC', 'MEDINC', 'NOHS', 'HSGRAD', 'SOMECOL', 'COLGRAD',
'POSTGRAD', 'COM0', 'COM15', 'COM30', 'COM60', 'TRDRIVE', 'TRPUBLIC',
'TRWALK', 'TRHOME', 'TROTHER']
In [9]:
import seaborn as sns
corr = milagro_df.corr()
sns.set_theme(rc={'figure.figsize':(20,30)})
<AxesSubplot:>
Out[9]:
In [10]:
def Step_Forward_Feature_Selection(dataframe, candidatas,target, selected_vars =
df = dataframe
i=0
grafico = {} # Diccionario donde se irán guardando las variables seleccionad
while True:
lr = LinearRegression()
lr.fit(df[selected_vars + [var]], target) #evaluamos el modelo p
y_predict = lr.predict(df[selected_vars + [var]])
r2 = r2_score(target,y_predict) # Calculamos el r2 del modelo
R2[var] = r2 # guardamos el r2 y la variable
print(grafico)
break
In [11]:
Step_Forward_Feature_Selection(dataframe = milagro_df, candidatas = candidatas,
/var/folders/64/cfyl69qd61dbq5r4h0w__0z40000gn/T/ipykernel_4589/295797521.py:12:
DeprecationWarning: The default dtype for empty Series will be 'object' instead
of 'float64' in a future version. Specify a dtype explicitly to silence this war
ning.
R2 = pd.Series(index = restantes) # serie donde guardaremos los valores r2
file:///Users/constanzaescobarsalas/Downloads/Resolución Caso 2 - Milagro.html 5/12
1/5/24, 20:40 Resolución Caso 2 - Milagro
/var/folders/64/cfyl69qd61dbq5r4h0w__0z40000gn/T/ipykernel_4589/295797521.py:12:
DeprecationWarning: The default dtype for empty Series will be 'object' instead
of 'float64' in a future version. Specify a dtype explicitly to silence this war
ning.
R2 = pd.Series(index = restantes) # serie donde guardaremos los valores r2
/var/folders/64/cfyl69qd61dbq5r4h0w__0z40000gn/T/ipykernel_4589/295797521.py:12:
DeprecationWarning: The default dtype for empty Series will be 'object' instead
of 'float64' in a future version. Specify a dtype explicitly to silence this war
ning.
R2 = pd.Series(index = restantes) # serie donde guardaremos los valores r2
/var/folders/64/cfyl69qd61dbq5r4h0w__0z40000gn/T/ipykernel_4589/295797521.py:12:
DeprecationWarning: The default dtype for empty Series will be 'object' instead
of 'float64' in a future version. Specify a dtype explicitly to silence this war
ning.
R2 = pd.Series(index = restantes) # serie donde guardaremos los valores r2
/var/folders/64/cfyl69qd61dbq5r4h0w__0z40000gn/T/ipykernel_4589/295797521.py:12:
DeprecationWarning: The default dtype for empty Series will be 'object' instead
of 'float64' in a future version. Specify a dtype explicitly to silence this war
ning.
R2 = pd.Series(index = restantes) # serie donde guardaremos los valores r2
/var/folders/64/cfyl69qd61dbq5r4h0w__0z40000gn/T/ipykernel_4589/295797521.py:12:
DeprecationWarning: The default dtype for empty Series will be 'object' instead
of 'float64' in a future version. Specify a dtype explicitly to silence this war
ning.
R2 = pd.Series(index = restantes) # serie donde guardaremos los valores r2
/var/folders/64/cfyl69qd61dbq5r4h0w__0z40000gn/T/ipykernel_4589/295797521.py:12:
DeprecationWarning: The default dtype for empty Series will be 'object' instead
of 'float64' in a future version. Specify a dtype explicitly to silence this war
ning.
R2 = pd.Series(index = restantes) # serie donde guardaremos los valores r2
/var/folders/64/cfyl69qd61dbq5r4h0w__0z40000gn/T/ipykernel_4589/295797521.py:12:
DeprecationWarning: The default dtype for empty Series will be 'object' instead
of 'float64' in a future version. Specify a dtype explicitly to silence this war
ning.
R2 = pd.Series(index = restantes) # serie donde guardaremos los valores r2
/var/folders/64/cfyl69qd61dbq5r4h0w__0z40000gn/T/ipykernel_4589/295797521.py:12:
DeprecationWarning: The default dtype for empty Series will be 'object' instead
of 'float64' in a future version. Specify a dtype explicitly to silence this war
ning.
R2 = pd.Series(index = restantes) # serie donde guardaremos los valores r2
/var/folders/64/cfyl69qd61dbq5r4h0w__0z40000gn/T/ipykernel_4589/295797521.py:12:
DeprecationWarning: The default dtype for empty Series will be 'object' instead
of 'float64' in a future version. Specify a dtype explicitly to silence this war
ning.
R2 = pd.Series(index = restantes) # serie donde guardaremos los valores r2
/var/folders/64/cfyl69qd61dbq5r4h0w__0z40000gn/T/ipykernel_4589/295797521.py:12:
DeprecationWarning: The default dtype for empty Series will be 'object' instead
of 'float64' in a future version. Specify a dtype explicitly to silence this war
ning.
R2 = pd.Series(index = restantes) # serie donde guardaremos los valores r2
/var/folders/64/cfyl69qd61dbq5r4h0w__0z40000gn/T/ipykernel_4589/295797521.py:12:
DeprecationWarning: The default dtype for empty Series will be 'object' instead
of 'float64' in a future version. Specify a dtype explicitly to silence this war
ning.
R2 = pd.Series(index = restantes) # serie donde guardaremos los valores r2
/var/folders/64/cfyl69qd61dbq5r4h0w__0z40000gn/T/ipykernel_4589/295797521.py:12:
DeprecationWarning: The default dtype for empty Series will be 'object' instead
of 'float64' in a future version. Specify a dtype explicitly to silence this war
ning.
R2 = pd.Series(index = restantes) # serie donde guardaremos los valores r2
file:///Users/constanzaescobarsalas/Downloads/Resolución Caso 2 - Milagro.html 6/12
1/5/24, 20:40 Resolución Caso 2 - Milagro
/var/folders/64/cfyl69qd61dbq5r4h0w__0z40000gn/T/ipykernel_4589/295797521.py:12:
DeprecationWarning: The default dtype for empty Series will be 'object' instead
of 'float64' in a future version. Specify a dtype explicitly to silence this war
ning.
R2 = pd.Series(index = restantes) # serie donde guardaremos los valores r2
/var/folders/64/cfyl69qd61dbq5r4h0w__0z40000gn/T/ipykernel_4589/295797521.py:12:
DeprecationWarning: The default dtype for empty Series will be 'object' instead
of 'float64' in a future version. Specify a dtype explicitly to silence this war
ning.
R2 = pd.Series(index = restantes) # serie donde guardaremos los valores r2
/var/folders/64/cfyl69qd61dbq5r4h0w__0z40000gn/T/ipykernel_4589/295797521.py:12:
DeprecationWarning: The default dtype for empty Series will be 'object' instead
of 'float64' in a future version. Specify a dtype explicitly to silence this war
ning.
R2 = pd.Series(index = restantes) # serie donde guardaremos los valores r2
/var/folders/64/cfyl69qd61dbq5r4h0w__0z40000gn/T/ipykernel_4589/295797521.py:12:
DeprecationWarning: The default dtype for empty Series will be 'object' instead
of 'float64' in a future version. Specify a dtype explicitly to silence this war
ning.
R2 = pd.Series(index = restantes) # serie donde guardaremos los valores r2
/var/folders/64/cfyl69qd61dbq5r4h0w__0z40000gn/T/ipykernel_4589/295797521.py:12:
DeprecationWarning: The default dtype for empty Series will be 'object' instead
of 'float64' in a future version. Specify a dtype explicitly to silence this war
ning.
R2 = pd.Series(index = restantes) # serie donde guardaremos los valores r2
/var/folders/64/cfyl69qd61dbq5r4h0w__0z40000gn/T/ipykernel_4589/295797521.py:12:
DeprecationWarning: The default dtype for empty Series will be 'object' instead
of 'float64' in a future version. Specify a dtype explicitly to silence this war
ning.
R2 = pd.Series(index = restantes) # serie donde guardaremos los valores r2
/var/folders/64/cfyl69qd61dbq5r4h0w__0z40000gn/T/ipykernel_4589/295797521.py:12:
DeprecationWarning: The default dtype for empty Series will be 'object' instead
of 'float64' in a future version. Specify a dtype explicitly to silence this war
ning.
R2 = pd.Series(index = restantes) # serie donde guardaremos los valores r2
/var/folders/64/cfyl69qd61dbq5r4h0w__0z40000gn/T/ipykernel_4589/295797521.py:12:
DeprecationWarning: The default dtype for empty Series will be 'object' instead
of 'float64' in a future version. Specify a dtype explicitly to silence this war
ning.
R2 = pd.Series(index = restantes) # serie donde guardaremos los valores r2
/var/folders/64/cfyl69qd61dbq5r4h0w__0z40000gn/T/ipykernel_4589/295797521.py:12:
DeprecationWarning: The default dtype for empty Series will be 'object' instead
of 'float64' in a future version. Specify a dtype explicitly to silence this war
ning.
R2 = pd.Series(index = restantes) # serie donde guardaremos los valores r2
/var/folders/64/cfyl69qd61dbq5r4h0w__0z40000gn/T/ipykernel_4589/295797521.py:12:
DeprecationWarning: The default dtype for empty Series will be 'object' instead
of 'float64' in a future version. Specify a dtype explicitly to silence this war
ning.
R2 = pd.Series(index = restantes) # serie donde guardaremos los valores r2
/var/folders/64/cfyl69qd61dbq5r4h0w__0z40000gn/T/ipykernel_4589/295797521.py:12:
DeprecationWarning: The default dtype for empty Series will be 'object' instead
of 'float64' in a future version. Specify a dtype explicitly to silence this war
ning.
R2 = pd.Series(index = restantes) # serie donde guardaremos los valores r2
/var/folders/64/cfyl69qd61dbq5r4h0w__0z40000gn/T/ipykernel_4589/295797521.py:12:
DeprecationWarning: The default dtype for empty Series will be 'object' instead
of 'float64' in a future version. Specify a dtype explicitly to silence this war
ning.
R2 = pd.Series(index = restantes) # serie donde guardaremos los valores r2
file:///Users/constanzaescobarsalas/Downloads/Resolución Caso 2 - Milagro.html 7/12
1/5/24, 20:40 Resolución Caso 2 - Milagro
/var/folders/64/cfyl69qd61dbq5r4h0w__0z40000gn/T/ipykernel_4589/295797521.py:12:
DeprecationWarning: The default dtype for empty Series will be 'object' instead
of 'float64' in a future version. Specify a dtype explicitly to silence this war
ning.
R2 = pd.Series(index = restantes) # serie donde guardaremos los valores r2
/var/folders/64/cfyl69qd61dbq5r4h0w__0z40000gn/T/ipykernel_4589/295797521.py:12:
DeprecationWarning: The default dtype for empty Series will be 'object' instead
of 'float64' in a future version. Specify a dtype explicitly to silence this war
ning.
R2 = pd.Series(index = restantes) # serie donde guardaremos los valores r2
var values
0 +AGINC 0.753905
1 AGINC+FREESTAND 0.869388
2 AGINC+FREESTAND+TRDRIVE 0.892886
3 AGINC+FREESTAND+TRDRIVE+POP 0.920259
4 AGINC+FREESTAND+TRDRIVE+POP+NEARCOMP 0.925679
5 AGINC+FREESTAND+TRDRIVE+POP+NEARCOMP+COLGRAD 0.931897
6 AGINC+FREESTAND+TRDRIVE+POP+NEARCOMP+COLGRAD+SQFT 0.937190
7 AGINC+FREESTAND+TRDRIVE+POP+NEARCOMP+COLGRAD+S... 0.941041
8 AGINC+FREESTAND+TRDRIVE+POP+NEARCOMP+COLGRAD+S... 0.943129
9 AGINC+FREESTAND+TRDRIVE+POP+NEARCOMP+COLGRAD+S... 0.945083
10 AGINC+FREESTAND+TRDRIVE+POP+NEARCOMP+COLGRAD+S... 0.945905
11 AGINC+FREESTAND+TRDRIVE+POP+NEARCOMP+COLGRAD+S... 0.946338
12 AGINC+FREESTAND+TRDRIVE+POP+NEARCOMP+COLGRAD+S... 0.946828
13 AGINC+FREESTAND+TRDRIVE+POP+NEARCOMP+COLGRAD+S... 0.947177
14 AGINC+FREESTAND+TRDRIVE+POP+NEARCOMP+COLGRAD+S... 0.947450
15 AGINC+FREESTAND+TRDRIVE+POP+NEARCOMP+COLGRAD+S... 0.947607
16 AGINC+FREESTAND+TRDRIVE+POP+NEARCOMP+COLGRAD+S... 0.947774
17 AGINC+FREESTAND+TRDRIVE+POP+NEARCOMP+COLGRAD+S... 0.947825
18 AGINC+FREESTAND+TRDRIVE+POP+NEARCOMP+COLGRAD+S... 0.947847
19 AGINC+FREESTAND+TRDRIVE+POP+NEARCOMP+COLGRAD+S... 0.947863
20 AGINC+FREESTAND+TRDRIVE+POP+NEARCOMP+COLGRAD+S... 0.947870
21 AGINC+FREESTAND+TRDRIVE+POP+NEARCOMP+COLGRAD+S... 0.947874
22 AGINC+FREESTAND+TRDRIVE+POP+NEARCOMP+COLGRAD+S... 0.947877
23 AGINC+FREESTAND+TRDRIVE+POP+NEARCOMP+COLGRAD+S... 0.947877
24 AGINC+FREESTAND+TRDRIVE+POP+NEARCOMP+COLGRAD+S... 0.947877
25 AGINC+FREESTAND+TRDRIVE+POP+NEARCOMP+COLGRAD+S... 0.947877
26 AGINC+FREESTAND+TRDRIVE+POP+NEARCOMP+COLGRAD+S... 0.947877
La variables seleccionadas corresponden a 23 AGINC+FREESTAND+TRDRIVE+POP+NEA
RCOMP+COLGRAD+S...
24 AGINC+FREESTAND+TRDRIVE+POP+NEARCOMP+COLGRAD+S...
25 AGINC+FREESTAND+TRDRIVE+POP+NEARCOMP+COLGRAD+S...
Name: var, dtype: object
Podemos ver que el r2 del modelo aumenta a medida que se van sumando variables predictoras
sin embargo se puede ver que llega a un plató cuando de 0.94 (con 8 variables predictoras). Sin
embargo, las primeras 4 que más explican la variabilidad son:
AGINC+FREESTAND+TRDRIVE+POP con un r2 de 0.925.
In [75]:
predictores = ['AGINC','FREESTAND','TRDRIVE','POP']
In [76]:
x_train = milagro_df[predictores]
y_train = y
In [77]:
# Entrenamiento del modelo con las variables seleccionadasX
Notes:
[1] Standard Errors assume that the covariance matrix of the errors is correctly
specified.
[2] The condition number is large, 1.32e+09. This might indicate that there are
strong multicollinearity or other numerical problems.
/opt/anaconda3/lib/python3.9/site-packages/statsmodels/tsa/tsatools.py:142: Futu
reWarning: In a future version of pandas all arguments of concat except for the
argument 'objs' will be keyword-only
x = pd.concat(x[::order], 1)
In [78]:
x_train = milagro_df[predictores]
x_test = milagro_test[predictores]
y_test = milagro_test['ANNUAL PROFIT']
x_test_store48 = store_48[predictores]
In [79]:
x_test = sm.add_constant(x_test, prepend=True).rename(columns={'const':'intercep
y_pred = modelo.predict(x_test)
/opt/anaconda3/lib/python3.9/site-packages/statsmodels/tsa/tsatools.py:142: Futu
reWarning: In a future version of pandas all arguments of concat except for the
argument 'objs' will be keyword-only
x = pd.concat(x[::order], 1)
In [80]:
x_test_store48 = sm.add_constant(x_test_store48, prepend=True).rename(columns={'
y_pred_store48 = modelo.predict(x_test_store48)
y_pred_store48.sum().round(2)
33983675.89
Out[80]:
In [74]:
sns.set_theme(rc={'figure.figsize':(8,7)})
plot_figura(y_pred.values, y_test.values)