Professional Documents
Culture Documents
California Housing Dataset
California Housing Dataset
California Housing Dataset
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
import plotly.express as px
from sklearn.datasets import fetch_california_housing
sns.set()
%matplotlib inline
A:\Anaconda\lib\site-packages\scipy\__init__.py:155: UserWarning: A
NumPy version >=1.18.5 and <1.25.0 is required for this version of
SciPy (detected version 1.26.3
warnings.warn(f"A NumPy version >={np_minversion} and
<{np_maxversion}"
Loading Data
house = fetch_california_housing(data_home=None,
download_if_missing=True, return_X_y=False, as_frame=True)
df = house.data
df['MedHouseValue'] = house.target
df.head()
Summary Statistics
df.describe()
MedInc VS MedHouseValue
px.scatter(df,x='MedInc',y='MedHouseValue')
Housing Value based on Location
px.scatter(df, x="Longitude",y="Latitude",
color='MedHouseValue',size='Population')
import folium
import matplotlib.pyplot as plt
import matplotlib.colors as mcolors
import numpy as np
import branca.colormap as cm
# Create a colormap
colormap = plt.cm.viridis
folium.CircleMarker([row['Latitude'], row['Longitude']],
radius=5, # Size of the marker
fill=True,
fill_color=color,
color=None,
fill_opacity=0.7).add_to(map_california)
MedInc VS MedHouseValue
px.scatter(chunk,x='MedInc',y='MedHouseValue')
AveRooms VS MedHouseValue
px.scatter(chunk,x='AveRooms',y='MedHouseValue')
Heatmap
cor = df.corr()
sns.heatmap(cor, annot=True,fmt='.2f')
Machine Learning
from sklearn.linear_model import LinearRegression
from sklearn.model_selection import train_test_split
X =
df.drop(['MedHouseValue','Population','AveOccup','AveBedrms'],axis=1)
Y = df[['MedHouseValue']]
xtrain, xtest, ytrain, ytest = train_test_split(X, Y, test_size=0.4,
random_state=42)
model = LinearRegression().fit(xtrain,ytrain)
pred = model.predict(xtest)