Download as pdf or txt
Download as pdf or txt
You are on page 1of 9

Regresión con varios algoritmos

REGRESION LINEAL
In [28]: import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

In [29]: df = pd.read_csv("Advertising2.csv")

In [89]: def ejecutar_modelo(modelo, X_train, y_train, X_test, y_test):


#crea el modelo, lo entrena y evalua
modelo.fit(X_train, y_train)
preds = modelo.predict(X_test)
rmse = np.sqrt(mean_squared_error(y_test,preds))
print('RMSE: %.4f'%(rmse))
#graficar los resultados
#rango_senal = np.arange(0,101)
#salida = modelo.predict(rango_senal.reshape(-1,1))
#plt.figure(figsize=(12,6),dpi=200)
#sns.scatterplot(x='Publicidad',y='Ventas',data=df,color='black')

In [100]: df.tail()

Out[100]: TV radio newspaper sales

195 38.2 3.7 13.8 7.6

196 94.2 4.9 8.1 9.7

197 177.0 9.3 6.4 12.8

198 283.6 42.0 66.2 25.5

199 232.1 8.6 8.7 13.4


In [32]: fig,axes = plt.subplots(nrows=1,ncols=3,figsize=(16,6))

axes[0].plot(df['TV'],df['sales'],'o')
axes[0].set_ylabel("Ventas")
axes[0].set_title("Gasto en TV")

axes[1].plot(df['radio'],df['sales'],'o')
axes[1].set_title("Gasto en Radio")
axes[1].set_ylabel("Ventas")

axes[2].plot(df['newspaper'],df['sales'],'o')
axes[2].set_title("Gasto en Periódicos");
axes[2].set_ylabel("Ventas")
plt.tight_layout();
In [33]: # Relaciones entre característica
sns.pairplot(df,diag_kind='kde')

Out[33]: <seaborn.axisgrid.PairGrid at 0x2c9919aa2f0>


In [34]: X = df.drop('sales',axis=1)
y = df['sales']

In [35]: from sklearn.model_selection import train_test_split

In [36]: X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=101)

In [37]: from sklearn.linear_model import LinearRegression

In [38]: modelo = LinearRegression()

In [39]: modelo.fit(X_train,y_train)

Out[39]: LinearRegression()
In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook.
On GitHub, the HTML representation is unable to render, please try loading this page with nbviewer.org.

In [40]: modelo.intercept_

Out[40]: 3.1515267680706494

In [41]: modelo.coef_

Out[41]: array([ 0.04469599, 0.1875657 , -0.00032275])

In [42]: predicciones_test = modelo.predict(X_test)


In [43]: # Muestra las primeras 5 predicciones de prueba
predicciones_test[0:5]

Out[43]: array([15.74131332, 19.61062568, 11.44888935, 17.00819787, 9.17285676])

In [44]: # Muestra los 5 primeros valores verdaderos de prueba


y_test[:5]

Out[44]: 37 14.7
109 19.8
31 11.9
89 16.7
66 9.5
Name: sales, dtype: float64

In [45]: from sklearn.metrics import mean_absolute_error,mean_squared_error

In [46]: MAE = mean_absolute_error(y_test,predicciones_test)


MSE = mean_squared_error(y_test,predicciones_test)
RMSE = np.sqrt(MSE)

In [47]: MAE

Out[47]: 1.2137457736144808

In [48]: MSE

Out[48]: 2.298716697886378

In [49]: RMSE

Out[49]: 1.5161519375993877

In [50]: df['sales'].mean()

Out[50]: 14.0225
REGRESION POLINOMIAL
In [90]: ejecutar_modelo(canal, X_train, y_train, X_test, y_test )

RMSE: 1.2580

In [51]: modelPol=LinearRegression()

In [62]: from sklearn.pipeline import make_pipeline


from sklearn.preprocessing import PolynomialFeatures

In [69]: canal=make_pipeline(PolynomialFeatures(2), LinearRegression())

In [91]: ejecutar_modelo(canal, X_train, y_train, X_test, y_test )


RMSE: 1.2580

In [92]: canal=make_pipeline(PolynomialFeatures(3), LinearRegression())

In [93]: ejecutar_modelo(canal, X_train, y_train, X_test, y_test )

RMSE: 0.5803

In [94]: canal=make_pipeline(PolynomialFeatures(4), LinearRegression())


ejecutar_modelo(canal, X_train, y_train, X_test, y_test )

RMSE: 1.2580

KNN
In [95]: from sklearn.neighbors import KNeighborsRegressor
In [96]: valores_k = [1,5,10,15]
for n in valores_k:
modelo_knn = KNeighborsRegressor(n_neighbors=n)
ejecutar_modelo(modelo_knn,X_train, y_train, X_test, y_test)

RMSE: 1.6936
RMSE: 1.5412
RMSE: 1.9077
RMSE: 2.2057

ARBOLES DE DECISIÓN
In [97]: from sklearn.tree import DecisionTreeRegressor
modelo_arbolDeci=DecisionTreeRegressor()

In [98]: ejecutar_modelo(modelo_arbolDeci,X_train, y_train, X_test, y_test )

RMSE: 1.0120

In [99]: modelo_arbolDeci.get_n_leaves()

Out[99]: 132

Conclusiones
Debido a los parametros de evaluación se puede concluir que el mejor algoritmo de predicción para este caso es la regre
sión polinomial en grado 3, para el algoritmo de bosques aleatorios la escazes de datos hace insignificante su uso, ya
que con el arbol de decisión se obtienen mejores resultados para este dataset reducido

In [ ]: ​

You might also like