Download as pdf or txt
Download as pdf or txt
You are on page 1of 5

NICOLE VINCES SEMINARIO IPR-S-VE-7-6

Utilizar el codigo visto en clase de la prediccion de casas de Boston, pero esta vez se utilizará
el dataset de las casas de california. Este dateset se puede obtener de la siguiente manera:
from sklearn.datasets
import fetch_california_housing housing = fetch_california_housing()y.

# importando pandas, numpy y matplotlib


import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt

from sklearn.model_selection import train_test_split


from sklearn.preprocessing import RobustScaler
from sklearn import linear_model
from sklearn.metrics import mean_absolute_error , mean_absolute_percentage_err
or , mean_squared_error,r2_score
# Importamos el dataset desde skLearn
url= "/content/housing.csv"
df = pd.read_csv(url)

nRow, nCol = df.shape


print(f'There are {nRow} rows and {nCol} columns')
There are 20640 rows and 10 columns

df.head()
df.info()
df.describe()
df.dtypes
<class 'pandas.core.frame.DataFrame'>

RangeIndex: 20640 entries, 0 to 20639

Data columns (total 10 columns):


# Column Non-Null Count Dtype
--- ------ -------------- -----
0 longitude 20640 non-null float64
1 latitude 20640 non-null float64
2 housing_median_age 20640 non-null float64
3 total_rooms 20640 non-null float64
4 total_bedrooms 20433 non-null float64
5 population 20640 non-null float64
6 households 20640 non-null float64
7 median_income 20640 non-null float64
8 median_house_value 20640 non-null float64
9 ocean_proximity 20640 non-null object
dtypes: float64(9), object(1)

memory usage: 1.6+ MB


longitude float64
latitude float64
housing_median_age float64
total_rooms float64
total_bedrooms float64
population float64
households float64
median_income float64
median_house_value float64
ocean_proximity object
dtype: object
df.hist(bins = 30,color = "DarkCyan",figsize = (16,8))
plt.show()

values_ocean = df["ocean_proximity"].value_counts()
values_ocean.plot.pie(subplots = True,figsize = (12, 8),autopct='%1
.2f%%')
plt.title("ocean_proximity")
plt.legend(values_ocean.index)
plt.figure(figsize=(14,8))
sns.heatmap(df.corr(),annot= True,cmap = 'mako')
plt.show()

sns.pairplot(df)
lab=["0 - 2","2 - 4","4 - 6","6 - 8","8 - 10","10>"]
income_bins = pd.cut(df["median_income"],bins=[0,2,4,6,8,10,np.inf],la
bels=lab)

sns.countplot(x = income_bins,palette='YlGnBu')

sns.displot(data = df , x = "median_house_value" ,kind="hist",kde=T


rue,color='DarkCyan')
y= df['median_house_value' ].values
X = df.drop(['median_house_value'] , axis = 1).values
x_train,x_test,y_train , y_test = train_test_split(X,y, test_size=0
.2 , random_state=42)
rob_scaler = RobustScaler()
x_train = rob_scaler.fit_transform(x_train)
x_test = rob_scaler.fit_transform(x_test)
lr = linear_model.LinearRegression()
lr.fit(x_train, y_train)
lr.score(x_train, y_train)
print("The Coefficients are : ",lr.coef_)
print("\nThe Intercept: ", lr.intercept_)
y_predicted =lr.predict(x_test)
df = pd.DataFrame({"Y_test": y_test , "Y_predicted" : y_predicted.r
ound(2)})
df.head(10)
plt.figure(figsize=(10,8))
plt.plot(df[:50])
plt.legend(["Actual" , "Predicted"])

Err = y_test - y_predicted


plt.figure(figsize= (10, 6))
sns.histplot(Err)
plt.xlabel('Error') ;

You might also like