Download as docx, pdf, or txt
Download as docx, pdf, or txt
You are on page 1of 3

In [1]: #Assigment1: Manuel Tapia

Jupyter QtConsole 5.0.3


Python 3.8.8 (default, Apr 13 2021, 15:08:03) [MSC v.1916 64 bit (AMD64)]
Type 'copyright', 'credits' or 'license' for more information
IPython 7.22.0 -- An enhanced Interactive Python. Type '?' for help.

In [1]: #Assigment1: Manuel Tapia

In [2]: import pandas as pd


   ...: url1 = 'https://raw.githubusercontent.com/cinnData/DataSci/main/'
   ...: url2 = '5.%20Querying%20data%20in%20Pandas/airbnb.csv'
   ...: url = url1 + url2
   ...: df = pd.read_csv(url, index_col=0);

In [3]: df1=df[(df['property_type'] == 'Apartment') & (df['room_type'] == 'Entire home/apt')];


#creating a new DF with that filter

In [6]: df1.info() #the number of listing are 7666 entries.


<class 'pandas.core.frame.DataFrame'>
Int64Index: 7666 entries, 18666 to 34682586
Data columns (total 10 columns):
# Column Non-Null Count Dtype
--- ------ -------------- -----
0 host_id 7666 non-null int64
1 host_since 7664 non-null object
2 name 7664 non-null object
3 neighbourhood 7666 non-null object
4 property_type 7666 non-null object
5 room_type 7666 non-null object
6 bedrooms 7664 non-null float64
7 price 7666 non-null int64
8 number_of_reviews 7666 non-null int64
9 review_scores_rating 5845 non-null float64
dtypes: float64(2), int64(3), object(5)
memory usage: 658.8+ KB

In [7]: df2=df[(df['bedrooms'] >=1) & (df['bedrooms'] <=3)];#question 2, new conditions

In [10]: df2.info() #the number of listing are 16794 entries.


<class 'pandas.core.frame.DataFrame'>
Int64Index: 16794 entries, 18666 to 34686079
Data columns (total 10 columns):
# Column Non-Null Count Dtype
--- ------ -------------- -----
0 host_id 16794 non-null int64
1 host_since 16787 non-null object
2 name 16778 non-null object
3 neighbourhood 16794 non-null object
4 property_type 16794 non-null object
5 room_type 16794 non-null object
6 bedrooms 16794 non-null float64
7 price 16794 non-null int64
8 number_of_reviews 16794 non-null int64
9 review_scores_rating 12908 non-null float64
dtypes: float64(2), int64(3), object(5)
memory usage: 1.4+ MB

In [11]: df2.groupby(by='bedrooms')['price'].median().round() #question3: medium price as a


function of number of bedrooms, more bedrooms means more price, but in the data are outliers
that make the average price higher than the median
Out[11]:
bedrooms
1.0 45
2.0 95
3.0 120
Name: price, dtype: int64

In [18]: url_adit='https://raw.githubusercontent.com/cinnData/DataSci/main/5.%20Querying
%20data%20in%20Pandas/neighbourhoods.csv';

In [19]: df3 = pd.read_csv(url_adit, index_col=0);

In [20]: df4=pd.merge(df2,df3); #question5: merging the 2 dataframes taking as a reference the


column neighborhood

In [21]: df4.info()
<class 'pandas.core.frame.DataFrame'>
Int64Index: 16794 entries, 0 to 16793
Data columns (total 10 columns):
# Column Non-Null Count Dtype
--- ------ -------------- -----
0 host_id 16794 non-null int64
1 host_since 16787 non-null object
2 name 16778 non-null object
3 neighbourhood 16794 non-null object
4 property_type 16794 non-null object
5 room_type 16794 non-null object
6 bedrooms 16794 non-null float64
7 price 16794 non-null int64
8 number_of_reviews 16794 non-null int64
9 review_scores_rating 12908 non-null float64
dtypes: float64(2), int64(3), object(5)
memory usage: 1.4+ MB

In [27]: df4.groupby(['neighbourhood_group','bedrooms'])['price'].median().round()
Out[27]: #Question 5 : We can see that the most expensive neighborhood are Eixample San
marti and gracia, for all type of apartments (1,2 or 3 bedrooms)
In [31]: df4.groupby(['neighbourhood_group','bedrooms'])['price'].median().unstack().round()
#different view of the table, using unstack.
Out[31]:

In [32]:

You might also like