In [1]: #Assigment1: Manuel Tapia

In [2]: import pandas as pd

   ...: url1 = ''
   ...: url2 = '5.%20Querying%20data%20in%20Pandas/airbnb.csv'
   ...: url = url1 + url2
   ...: df = pd.read_csv(url, index_col=0);

In [3]: df1=df[(df['property_type'] == 'Apartment') & (df['room_type'] == 'Entire home/apt')];

#creating a new DF with that filter

In [6]: #the number of listing are 7666 entries.

<class 'pandas.core.frame.DataFrame'>
Int64Index: 7666 entries, 18666 to 34682586
Data columns (total 10 columns):
# Column Non-Null Count Dtype
--- ------ -------------- -----
0 host_id 7666 non-null int64
1 host_since 7664 non-null object
2 name 7664 non-null object
3 neighbourhood 7666 non-null object
4 property_type 7666 non-null object
5 room_type 7666 non-null object
6 bedrooms 7664 non-null float64
7 price 7666 non-null int64
8 number_of_reviews 7666 non-null int64
9 review_scores_rating 5845 non-null float64
dtypes: float64(2), int64(3), object(5)
memory usage: 658.8+ KB

In [7]: df2=df[(df['bedrooms'] >=1) & (df['bedrooms'] <=3)];#question 2, new conditions

In [10]: #the number of listing are 16794 entries.

<class 'pandas.core.frame.DataFrame'>
Int64Index: 16794 entries, 18666 to 34686079
Data columns (total 10 columns):
# Column Non-Null Count Dtype
--- ------ -------------- -----
0 host_id 16794 non-null int64
1 host_since 16787 non-null object
2 name 16778 non-null object
3 neighbourhood 16794 non-null object
4 property_type 16794 non-null object
5 room_type 16794 non-null object
6 bedrooms 16794 non-null float64
7 price 16794 non-null int64
8 number_of_reviews 16794 non-null int64
9 review_scores_rating 12908 non-null float64
dtypes: float64(2), int64(3), object(5)
memory usage: 1.4+ MB

In [11]: df2.groupby(by='bedrooms')['price'].median().round() #question3: medium price as a

function of number of bedrooms, more bedrooms means more price, but in the data are outliers
that make the average price higher than the median
1.0 45
2.0 95
3.0 120
Name: price, dtype: int64

In [18]: url_adit='

In [19]: df3 = pd.read_csv(url_adit, index_col=0);

In [20]: df4=pd.merge(df2,df3); #question5: merging the 2 dataframes taking as a reference the

column neighborhood

In [21]:
<class 'pandas.core.frame.DataFrame'>
Int64Index: 16794 entries, 0 to 16793
Data columns (total 10 columns):
# Column Non-Null Count Dtype
--- ------ -------------- -----
0 host_id 16794 non-null int64
1 host_since 16787 non-null object
2 name 16778 non-null object
3 neighbourhood 16794 non-null object
4 property_type 16794 non-null object
5 room_type 16794 non-null object
6 bedrooms 16794 non-null float64
7 price 16794 non-null int64
8 number_of_reviews 16794 non-null int64
9 review_scores_rating 12908 non-null float64
dtypes: float64(2), int64(3), object(5)
memory usage: 1.4+ MB

In [27]: df4.groupby(['neighbourhood_group','bedrooms'])['price'].median().round()
Out[27]: #Question 5 : We can see that the most expensive neighborhood are Eixample San
marti and gracia, for all type of apartments (1,2 or 3 bedrooms)
In [31]: df4.groupby(['neighbourhood_group','bedrooms'])['price'].median().unstack().round()
#different view of the table, using unstack.

In [32]:

