Pandas

4/4/2021 pandas
In [2]:
import pandas as pd
In [1]:
df = pd.read_csv('Tweets.csv')
---------------------------------------------------------------------------
NameError Traceback (most recent call last)
<ipython-input-1-8cbae2239962> in <module>
----> 1 df = pd.read_csv('Tweets.csv')
NameError: name 'pd' is not defined
In [4]:
df.shape
Out[4]: (14640, 15)
In [5]:
df.columns
Out[5]: Index(['tweet_id', 'airline_sentiment', 'airline_sentiment_confidence',

'negativereason', 'negativereason_confidence', 'airline',
'airline_sentiment_gold', 'name', 'negativereason_gold',
'retweet_count', 'text', 'tweet_coord', 'tweet_created',
'tweet_location', 'user_timezone'],
dtype='object')
In [6]:
df.info()
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 14640 entries, 0 to 14639
Data columns (total 15 columns):
# Column Non-Null Count Dtype
--- ------ -------------- -----
0 tweet_id 14640 non-null int64
1 airline_sentiment 14640 non-null object
2 airline_sentiment_confidence 14640 non-null float64
3 negativereason 9178 non-null object
4 negativereason_confidence 10522 non-null float64
5 airline 14640 non-null object
6 airline_sentiment_gold 40 non-null object
7 name 14640 non-null object
8 negativereason_gold 32 non-null object
9 retweet_count 14640 non-null int64
10 text 14640 non-null object
11 tweet_coord 1019 non-null object
12 tweet_created 14640 non-null object
13 tweet_location 9907 non-null object
14 user_timezone 9820 non-null object
dtypes: float64(2), int64(2), object(11)
memory usage: 1.7+ MB
In [7]:
df.describe()
Out[7]: tweet_id airline_sentiment_confidence negativereason_confidence retweet_count
count 1.464000e+04 14640.000000 10522.000000 14640.000000
localhost:8888/nbconvert/html/pandas.ipynb?download=false 1/23
4/4/2021 pandas
tweet_id airline_sentiment_confidence negativereason_confidence retweet_count
mean 5.692184e+17 0.900169 0.638298 0.082650
std 7.791112e+14 0.162830 0.330440 0.745778
min 5.675883e+17 0.335000 0.000000 0.000000
25% 5.685592e+17 0.692300 0.360600 0.000000
50% 5.694779e+17 1.000000 0.670600 0.000000
75% 5.698905e+17 1.000000 1.000000 0.000000
max 5.703106e+17 1.000000 1.000000 44.000000
In [12]:
df
Out[12]: tweet_id airline_sentiment airline_sentiment_confidence negativereason negativereas
0 570306133677760513 neutral 1.0000 NaN
1 570301130888122368 positive 0.3486 NaN
2 570301083672813571 neutral 0.6837 NaN
3 570301031407624196 negative 1.0000 Bad Flight
4 570300817074462722 negative 1.0000 Can't Tell
... ... ... ... ...
14635 569587686496825344 positive 0.3487 NaN
Customer
14636 569587371693355008 negative 1.0000
Service Issue
14637 569587242672398336 neutral 1.0000 NaN
4/4/2021 pandas
tweet_id airline_sentiment airline_sentiment_confidence negativereason negativereas
Customer
14638 569587188687634433 negative 1.0000
Service Issue
14639 569587140490866689 neutral 0.6771 NaN
14640 rows × 15 columns
In [8]:
df.head()
Out[8]: tweet_id airline_sentiment airline_sentiment_confidence negativereason negativereason_c
0 570306133677760513 neutral 1.0000 NaN
1 570301130888122368 positive 0.3486 NaN
2 570301083672813571 neutral 0.6837 NaN
In [9]:
df.head(10)
Out[9]: tweet_id airline_sentiment airline_sentiment_confidence negativereason negativereason_c
0 570306133677760513 neutral 1.0000 NaN
1 570301130888122368 positive 0.3486 NaN
4/4/2021 pandas
tweet_id airline_sentiment airline_sentiment_confidence negativereason negativereason_c
2 570301083672813571 neutral 0.6837 NaN
6 570300616901320704 positive 0.6745 NaN
7 570300248553349120 neutral 0.6340 NaN
8 570299953286942721 positive 0.6559 NaN
9 570295459631263746 positive 1.0000 NaN
In [15]:
df.head(60)
Out[15]: tweet_id airline_sentiment airline_sentiment_confidence negativereason negativereason_
0 570306133677760513 neutral 1.0000 NaN
1 570301130888122368 positive 0.3486 NaN
2 570301083672813571 neutral 0.6837 NaN
4/4/2021 pandas
tweet_id airline_sentiment airline_sentiment_confidence negativereason negativereason_
6 570300616901320704 positive 0.6745 NaN
7 570300248553349120 neutral 0.6340 NaN
8 570299953286942721 positive 0.6559 NaN
9 570295459631263746 positive 1.0000 NaN
10 570294189143031808 neutral 0.6769 NaN
11 570289724453216256 positive 1.0000 NaN
12 570289584061480960 positive 1.0000 NaN
13 570287408438120448 positive 0.6451 NaN
14 570285904809598977 positive 1.0000 NaN
15 570282469121007616 negative 0.6842 Late Flight
16 570277724385734656 positive 1.0000 NaN
18 570270684619923457 positive 1.0000 NaN
19 570267956648792064 positive 1.0000 NaN
4/4/2021 pandas
21 570264145116819457 positive 1.0000 NaN
22 570259420287868928 positive 1.0000 NaN
23 570258822297579520 neutral 1.0000 NaN
Customer
24 570256553502068736 negative 1.0000
Service Issue
Customer
25 570249102404923392 negative 1.0000
Service Issue
27 570217831557677057 neutral 0.6854 NaN
29 570124596180955136 neutral 0.6150 NaN
Flight Booking
30 570114021854212096 negative 1.0000
Problems
31 570094701371469825 neutral 1.0000 NaN
Customer
32 570088404156698625 negative 1.0000
Service Issue
Customer
33 570084582780899328 negative 1.0000
Service Issue
34 570076792993611776 positive 1.0000 NaN
35 570051991277342720 neutral 0.6207 NaN
36 570051381534396416 positive 1.0000 NaN
4/4/2021 pandas
37 570045393565691904 positive 1.0000 NaN
38 570038941497192448 neutral 0.6791 NaN
Customer
39 570035876845084672 negative 1.0000
Service Issue
40 570033593394667521 positive 0.6639 NaN
Flight Booking
41 570025482344898560 negative 0.6688
Problems
42 570016304284901379 neutral 1.0000 NaN
43 570015408788414464 neutral 0.6578 NaN
44 570013523650048002 neutral 1.0000 NaN
45 570012257549070337 positive 1.0000 NaN
46 570011341483843584 neutral 0.6799 NaN
47 570010571707256832 positive 1.0000 NaN
48 570010539499393025 neutral 1.0000 NaN
49 570009713447825408 neutral 0.6436 NaN
50 570009035455344640 neutral 0.6764 NaN
51 570006886012973056 positive 0.6570 NaN
4/4/2021 pandas
52 570004391731847169 neutral 1.0000 NaN
53 570001194900426752 neutral 0.7118 NaN
54 570000071644872704 neutral 1.0000 NaN
Flight Booking
55 569996412286582784 negative 0.6939
Problems
56 569996245462159361 positive 1.0000 NaN
57 569990222609412097 positive 0.6350 NaN
58 569990163209850881 neutral 0.7007 NaN
59 569989504431316993 neutral 1.0000 NaN
In [16]:
df.head(70)
Out[16]: tweet_id airline_sentiment airline_sentiment_confidence negativereason negativereason_
0 570306133677760513 neutral 1.0000 NaN
1 570301130888122368 positive 0.3486 NaN
2 570301083672813571 neutral 0.6837 NaN
4/4/2021 pandas
... ... ... ... ...
65 569982307634794497 neutral 0.6814 NaN
Customer
66 569976620158578688 negative 1.0000
Service Issue
68 569972508499283968 positive 0.6922 NaN
69 569967019958730753 negative 1.0000 Lost Luggage
In [17]:
df.tail()
14635 569587686496825344 positive 0.3487 NaN
Customer
14636 569587371693355008 negative 1.0000
Service Issue
14637 569587242672398336 neutral 1.0000 NaN
Customer
14638 569587188687634433 negative 1.0000
Service Issue
4/4/2021 pandas
tweet_id airline_sentiment airline_sentiment_confidence negativereason negativereas
14639 569587140490866689 neutral 0.6771 NaN
In [18]:
df.tail(10)
14630 569588473050611712 positive 1.0000 NaN
14632 569587813856841728 neutral 0.6760 NaN
14633 569587705937600512 negative 1.0000 Cancelled Flight
14635 569587686496825344 positive 0.3487 NaN
Customer
14636 569587371693355008 negative 1.0000
Service Issue
14637 569587242672398336 neutral 1.0000 NaN
Customer
14638 569587188687634433 negative 1.0000
Service Issue
14639 569587140490866689 neutral 0.6771 NaN
4/4/2021 pandas
In [20]:
df[100:110]
Out[20]: tweet_id airline_sentiment airline_sentiment_confidence negativereason negativereason
100 569894407001939968 neutral 1.0000 NaN
102 569891469210755074 neutral 1.0000 NaN
105 569887049446076416 positive 1.0000 NaN
Customer
106 569884551712886785 negative 1.0000
Service Issue
Flight Booking
107 569884407852437504 negative 1.0000
Problems
108 569881548515708928 neutral 0.6593 NaN
109 569873669700358144 positive 0.6823 NaN
In [10]:
df['tweet_id']
Out[10]: 0 570306133677760513
4/4/2021 pandas
1 570301130888122368
2 570301083672813571
3 570301031407624196
4 570300817074462722
...
14635 569587686496825344
14636 569587371693355008
14637 569587242672398336
14638 569587188687634433
14639 569587140490866689
Name: tweet_id, Length: 14640, dtype: int64
In [11]:
type(df['tweet_id'])
Out[11]: pandas.core.series.Series
In [12]:
type(df[['tweet_id']])
Out[12]: pandas.core.frame.DataFrame
In [14]:
df[['tweet_id', 'airline_sentiment']]
Out[14]: tweet_id airline_sentiment
0 570306133677760513 neutral
1 570301130888122368 positive
2 570301083672813571 neutral
3 570301031407624196 negative
4 570300817074462722 negative
... ... ...
14635 569587686496825344 positive
14636 569587371693355008 negative
14637 569587242672398336 neutral
14638 569587188687634433 negative
14639 569587140490866689 neutral
In [27]:
df[['negativereason','tweet_id', 'airline_sentiment']]
Out[27]: negativereason tweet_id airline_sentiment
0 NaN 570306133677760513 neutral
1 NaN 570301130888122368 positive
2 NaN 570301083672813571 neutral
4/4/2021 pandas
negativereason tweet_id airline_sentiment
3 Bad Flight 570301031407624196 negative
4 Can't Tell 570300817074462722 negative
... ... ... ...
14635 NaN 569587686496825344 positive
14636 Customer Service Issue 569587371693355008 negative
14637 NaN 569587242672398336 neutral
14638 Customer Service Issue 569587188687634433 negative
14639 NaN 569587140490866689 neutral
In [15]:
df2 = pd.read_csv('Suicides in India 2001-2012.csv')
In [16]:
df2
Out[16]: State Year Type_code Type Gender Age_group Total
A&N
0 2001 Causes Illness (Aids/STD) Female 0-14 0
Islands
A&N Bankruptcy or Sudden change in

1 2001 Causes Female 0-14 0
Islands Economic
A&N Cancellation/Non-Settlement of
Islands Marriage
A&N
3 2001 Causes Physical Abuse (Rape/Incest Etc.) Female 0-14 0
Islands
A&N
4 2001 Causes Dowry Dispute Female 0-14 0
Islands
... ... ... ... ... ... ... ...
West
237514 2012 Social_Status Seperated Male 0-100+ 149
Bengal
West
237515 2012 Social_Status Widowed/Widower Male 0-100+ 233
Bengal
West
237516 2012 Social_Status Married Male 0-100+ 5451
Bengal
West
237517 2012 Social_Status Divorcee Male 0-100+ 189
Bengal
West
237518 2012 Social_Status Never Married Male 0-100+ 2658
Bengal
4/4/2021 pandas
In [17]: df2.info()
--- ------ -------------- -----
0 State 237519 non-null object
1 Year 237519 non-null int64
2 Type_code 237519 non-null object
3 Type 237519 non-null object
4 Gender 237519 non-null object
5 Age_group 237519 non-null object
6 Total 237519 non-null int64
dtypes: int64(2), object(5)
In [18]:
df2.shape
Out[18]: (237519, 7)
In [19]:
df2['Year'].unique()
Out[19]: array([2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010, 2011,
2012], dtype=int64)
In [20]:
df2['Year'].value_counts()
Out[20]: 2011 19806

2005 19803
2012 19799
2008 19797
2001 19797
2007 19794
2010 19792
2002 19790
2009 19786
2006 19786
2003 19786
2004 19783
Name: Year, dtype: int64
In [37]:
df2['Type'].value_counts()
Out[37]: Others (Please Specify) 7263

Illness (Aids/STD) 4200
Professional/Career Problem 4200
Dowry Dispute 4200
Love Affairs 4200
...
Seperated 912
No Education 912
Not having Children (Barrenness/Impotency 350
Bankruptcy or Sudden change in Economic Status 350
By Other means 350
Name: Type, Length: 69, dtype: int64
In [38]:
df2['Gender'].value_counts()
4/4/2021 pandas
Out[38]: Male 118879

Female 118640
Name: Gender, dtype: int64
In [43]:
df2['State'].value_counts()
Out[43]: Maharashtra 6792

Madhya Pradesh 6792
Karnataka 6792
Rajasthan 6791
Andhra Pradesh 6791
Odisha 6791
Haryana 6790
Chhattisgarh 6790
Bihar 6790
Kerala 6788
Uttar Pradesh 6787
Tamil Nadu 6786
Gujarat 6786
Assam 6786
Jharkhand 6785
Delhi (Ut) 6782
Tripura 6782
West Bengal 6780
Punjab 6779
Himachal Pradesh 6774
Jammu & Kashmir 6761
Goa 6759
Uttarakhand 6758
Sikkim 6742
Mizoram 6737
Meghalaya 6733
Puducherry 6730
Chandigarh 6717
A & N Islands 6712
Daman & Diu 6710
Arunachal Pradesh 6707
Nagaland 6705
D & N Haveli 6704
Manipur 6700
Lakshadweep 6674
Total (States) 312
Total (All India) 312
Total (Uts) 312
Name: State, dtype: int64
In [21]:
df2[df2['State'] == 'Karnataka']
108144 Karnataka 2001 Causes Insanity/Mental Illness Female 0-14 11
108145 Karnataka 2001 Causes Causes Not known Female 0-14 42
108146 Karnataka 2001 Causes Property Dispute Female 0-14 0
108147 Karnataka 2001 Causes Drug Abuse/Addiction Female 0-14 5
108148 Karnataka 2001 Causes Cancer Female 0-14 0
... ... ... ... ... ... ... ...
4/4/2021 pandas
State Year Type_code Type Gender Age_group Total
114931 Karnataka 2012 Social_Status Seperated Male 0-100+ 215
114932 Karnataka 2012 Social_Status Married Male 0-100+ 6216
114933 Karnataka 2012 Social_Status Never Married Male 0-100+ 1968
114934 Karnataka 2012 Social_Status Widowed/Widower Male 0-100+ 180
114935 Karnataka 2012 Social_Status Divorcee Male 0-100+ 17
In [23]:
df2[(df2['State'] == 'Karnataka') | (df2['State'] == 'Punjab')]
... ... ... ... ... ... ... ...
189152 Punjab 2012 Social_Status Seperated Male 0-100+ 10
189153 Punjab 2012 Social_Status Divorcee Male 0-100+ 1
189154 Punjab 2012 Social_Status Widowed/Widower Male 0-100+ 4
189155 Punjab 2012 Social_Status Never Married Male 0-100+ 299
189156 Punjab 2012 Social_Status Married Male 0-100+ 470
In [25]:
df2[(df2['State'] == 'Bihar') & (df2['Gender'] == 'Male') & (df2['Type'] == 'Cancer')].
Out[25]: (60, 7)
In [45]:
karnataka_df = df2[df2['State'] == 'Karnataka']
In [46]:
karnataka_df
4/4/2021 pandas
... ... ... ... ... ... ... ...
In [47]:
karnataka_df.to_excel('karnataka_df.xlsx')
In [50]:
karnataka_df[karnataka_df['Year'] == 2005]
Bankruptcy or Sudden change in

110408 Karnataka 2005 Causes Female 0-14 0
Economic
110409 Karnataka 2005 Causes Other Causes (Please Specity) Female 0-14 38
110410 Karnataka 2005 Causes Paralysis Female 0-14 0
110411 Karnataka 2005 Causes Poverty Female 0-14 1
110412 Karnataka 2005 Causes Death of Dear Person Female 0-14 0
... ... ... ... ... ... ... ...
In [51]:
df2[(df2['State'] == 'Kerala') & (df2['Year'] == 2005)]
117200 Kerala 2005 Causes Cancer Female 0-14 0
Bankruptcy or Sudden change in

117201 Kerala 2005 Causes Female 0-14 0
Economic
4/4/2021 pandas
Cancellation/Non-Settlement of
117202 Kerala 2005 Causes Female 0-14 0
Marriage
117203 Kerala 2005 Causes Illegitimate Pregnancy Female 0-14 0
117204 Kerala 2005 Causes Family Problems Female 0-14 13
... ... ... ... ... ... ... ...
117761 Kerala 2005 Social_Status Widowed/Widower Male 0-100+ 128
117762 Kerala 2005 Social_Status Married Male 0-100+ 5388
117763 Kerala 2005 Social_Status Divorcee Male 0-100+ 16
117764 Kerala 2005 Social_Status Seperated Male 0-100+ 60
117765 Kerala 2005 Social_Status Never Married Male 0-100+ 1238
In [52]:
df2.info()
--- ------ -------------- -----
0 State 237519 non-null object
1 Year 237519 non-null int64
2 Type_code 237519 non-null object
3 Type 237519 non-null object
4 Gender 237519 non-null object
5 Age_group 237519 non-null object
6 Total 237519 non-null int64
dtypes: int64(2), object(5)
In [33]:
df2[(df2['State']=='Gujarat') & (df2['Year'] >= 2003) & (df2['Year'] <= 2005 ) & (df2[
75403 Gujarat 2003 Causes Cancer Female 0-14 1
75602 Gujarat 2003 Causes Cancer Female 60+ 2
4/4/2021 pandas
In [40]:
df3 = pd.read_csv('sucide df.csv')
df3.shape
Out[40]: (237530, 7)
In [36]:
df3
A&N
Islands
A&N Bankruptcy or Sudden change in

Islands Economic
A&N Cancellation/Non-Settlement of
Islands Marriage
A&N
3 2001 Causes Physical Abuse (Rape/Incest Etc.) Female 0-14 0
Islands
A&N
Islands
... ... ... ... ... ... ... ...
West
237525 2012 Social_Status Seperated Male 0-100+ 149
Bengal
West
237526 2012 Social_Status Widowed/Widower Male 0-100+ 233
Bengal
West
237527 2012 Social_Status Married Male 0-100+ 5451
Bengal
West
237528 2012 Social_Status Divorcee Male 0-100+ 189
Bengal
West
237529 2012 Social_Status Never Married Male 0-100+ 2658
Bengal
In [37]:
df3[df3.duplicated()]
4/4/2021 pandas
237519 West Bengal 2012 Professional_Profile Service (Government) Male 60+ 0
237520 West Bengal 2012 Social_Status Seperated Female 0-100+ 200
237521 West Bengal 2012 Social_Status Married Female 0-100+ 3927
237522 West Bengal 2012 Social_Status Divorcee Female 0-100+ 182
237523 West Bengal 2012 Social_Status Widowed/Widower Female 0-100+ 455
237524 West Bengal 2012 Social_Status Never Married Female 0-100+ 1513
237525 West Bengal 2012 Social_Status Seperated Male 0-100+ 149
237526 West Bengal 2012 Social_Status Widowed/Widower Male 0-100+ 233
237527 West Bengal 2012 Social_Status Married Male 0-100+ 5451
237528 West Bengal 2012 Social_Status Divorcee Male 0-100+ 189
237529 West Bengal 2012 Social_Status Never Married Male 0-100+ 2658
In [41]:
df3 = df3.drop_duplicates()
In [42]:
df3[df3.duplicated()]
In [43]:
df4 = df3.drop_duplicates(subset=['State'])
In [44]:
df4
A&N
Islands
Andhra
6712 2001 Causes Paralysis Female 0-14 0
Pradesh
Arunachal Cancellation/Non-Settlement
13503 2001 Causes Female 0-14 0
Pradesh of Marriage
20210 Assam 2001 Causes Drug Abuse/Addiction Female 0-14 0
26996 Bihar 2001 Causes Drug Abuse/Addiction Female 0-14 0
33786 Chandigarh 2001 Causes Drug Abuse/Addiction Female 0-14 0
40503 Chhattisgarh 2001 Causes Causes Not known Female 0-14 13
47293 D & N Haveli 2001 Causes Insanity/Mental Illness Female 0-14 0
Daman &
53997 2001 Causes Illegitimate Pregnancy Female 0-14 0
Diu
4/4/2021 pandas
60707 Delhi (Ut) 2001 Causes Professional/Career Problem Female 0-14 0
Cancellation/Non-Settlement
67489 Goa 2001 Causes Female 0-14 0
of Marriage
74248 Gujarat 2001 Causes Illegitimate Pregnancy Female 0-14 0
81034 Haryana 2001 Causes Death of Dear Person Female 0-14 0
Himachal
Pradesh
Jammu &
Kashmir
101359 Jharkhand 2001 Causes Failure in Examination Female 0-14 1
114936 Kerala 2001 Causes Love Affairs Female 0-14 0
121724 Lakshadweep 2001 Causes Drug Abuse/Addiction Female 0-14 0
Madhya
128398 2001 Causes Insanity/Mental Illness Female 0-14 2
Pradesh
Ideological Causes/Hero
135190 Maharashtra 2001 Causes Female 0-14 0
Worshipping
141982 Manipur 2001 Causes Paralysis Female 0-14 0
148682 Meghalaya 2001 Causes Death of Dear Person Female 0-14 0
155415 Mizoram 2001 Causes Death of Dear Person Female 0-14 0
162152 Nagaland 2001 Causes Fall in Social Reputation Female 0-14 0
Bankruptcy or Sudden
168857 Odisha 2001 Causes Female 0-14 0
change in Economic
175648 Puducherry 2001 Causes Illegitimate Pregnancy Female 0-14 0
182378 Punjab 2001 Causes Dowry Dispute Female 0-14 0
189157 Rajasthan 2001 Causes Causes Not known Female 0-14 12
195948 Sikkim 2001 Causes Failure in Examination Female 0-14 0
202690 Tamil Nadu 2001 Causes Death of Dear Person Female 0-14 2
Hr.
Total (All
209476 2001 Education_Status Secondary/Intermediate/Pre- Female 0-100+ 2391
India)
Universit
209788 Total (States) 2001 Education_Status Primary Female 0-100+ 11118
210100 Total (Uts) 2001 Education_Status No Education Female 0-100+ 148
210412 Tripura 2001 Causes Poverty Female 0-14 1
Uttar Other Causes (Please

217194 2001 Causes Female 0-14 13
Pradesh Specity)
223981 Uttarakhand 2001 Causes Unemployment Female 0-14 0
4/4/2021 pandas
Physical Abuse (Rape/Incest

230739 West Bengal 2001 Causes Female 0-14 1
Etc.)
In [45]:
df3.describe()
Out[45]: Year Total
count 237519.000000 237519.000000
mean 2006.500448 55.034477
std 3.452240 792.749038
min 2001.000000 0.000000
25% 2004.000000 0.000000
50% 2007.000000 0.000000
75% 2010.000000 6.000000
max 2012.000000 63343.000000
In [46]:
import matplotlib.pyplot as plt
In [48]:
churn_df = pd.read_csv('Churn.csv')
In [49]:
churn_df
Out[49]: RowNumber CustomerId Surname CreditScore Geography Gender Age Tenure Balance
0 1 15634602 Hargrave 619 France Female 42 2 0.00
1 2 15647311 Hill 608 Spain Female 41 1 83807.86
2 3 15619304 Onio 502 France Female 42 8 159660.80
3 4 15701354 Boni 699 France Female 39 1 0.00
4 5 15737888 Mitchell 850 Spain Female 43 2 125510.82
... ... ... ... ... ... ... ... ... ...
9995 9996 15606229 Obijiaku 771 France Male 39 5 0.00
9996 9997 15569892 Johnstone 516 France Male 35 10 57369.61
9997 9998 15584532 Liu 709 France Female 36 7 0.00
9998 9999 15682355 Sabbatini 772 Germany Male 42 3 75075.31
9999 10000 15628319 Walker 792 France Female 28 4 130142.79
4/4/2021 pandas
In [50]:
churn_df.describe()
Out[50]: RowNumber CustomerId CreditScore Age Tenure Balance NumOfPro
count 10000.00000 1.000000e+04 10000.000000 10000.000000 10000.000000 10000.000000 10000.00
mean 5000.50000 1.569094e+07 650.528800 38.921800 5.012800 76485.889288 1.53
std 2886.89568 7.193619e+04 96.653299 10.487806 2.892174 62397.405202 0.58
min 1.00000 1.556570e+07 350.000000 18.000000 0.000000 0.000000 1.00
25% 2500.75000 1.562853e+07 584.000000 32.000000 3.000000 0.000000 1.00
50% 5000.50000 1.569074e+07 652.000000 37.000000 5.000000 97198.540000 1.00
75% 7500.25000 1.575323e+07 718.000000 44.000000 7.000000 127644.240000 2.00
max 10000.00000 1.581569e+07 850.000000 92.000000 10.000000 250898.090000 4.00
In [51]:
churn_df.boxplot(column=['Age'])
Out[51]: <matplotlib.axes._subplots.AxesSubplot at 0x1ab45349848>
In [53]:
churn_df = churn_df[churn_df['Age'] < 62]
In [ ]:

Pandas

Uploaded by

Document Information

Original Description:

Copyright

Available Formats

Share this document

Share or Embed Document

Sharing Options

Did you find this document useful?

Is this content inappropriate?

Copyright:

Available Formats

Pandas

Uploaded by

Copyright:

Available Formats

4/4/2021 pandas

NameError: name 'pd' is not defined

Out[4]: (14640, 15)

Out[5]: Index(['tweet_id', 'airline_sentiment', 'airline_sentiment_confidence',

Out[7]: tweet_id airline_sentiment_confidence negativereason_confidence retweet_count

count 1.464000e+04 14640.000000 10522.000000 14640.000000

tweet_id airline_sentiment_confidence negativereason_confidence retweet_count

mean 5.692184e+17 0.900169 0.638298 0.082650

std 7.791112e+14 0.162830 0.330440 0.745778

min 5.675883e+17 0.335000 0.000000 0.000000

25% 5.685592e+17 0.692300 0.360600 0.000000

50% 5.694779e+17 1.000000 0.670600 0.000000

75% 5.698905e+17 1.000000 1.000000 0.000000

max 5.703106e+17 1.000000 1.000000 44.000000

Out[12]: tweet_id airline_sentiment airline_sentiment_confidence negativereason negativereas

0 570306133677760513 neutral 1.0000 NaN

1 570301130888122368 positive 0.3486 NaN

2 570301083672813571 neutral 0.6837 NaN

3 570301031407624196 negative 1.0000 Bad Flight

4 570300817074462722 negative 1.0000 Can't Tell

... ... ... ... ...

14635 569587686496825344 positive 0.3487 NaN

14637 569587242672398336 neutral 1.0000 NaN

tweet_id airline_sentiment airline_sentiment_confidence negativereason negativereas

14639 569587140490866689 neutral 0.6771 NaN

14640 rows × 15 columns

Out[8]: tweet_id airline_sentiment airline_sentiment_confidence negativereason negativereason_c

0 570306133677760513 neutral 1.0000 NaN

1 570301130888122368 positive 0.3486 NaN

2 570301083672813571 neutral 0.6837 NaN

3 570301031407624196 negative 1.0000 Bad Flight

4 570300817074462722 negative 1.0000 Can't Tell

Out[9]: tweet_id airline_sentiment airline_sentiment_confidence negativereason negativereason_c

0 570306133677760513 neutral 1.0000 NaN

1 570301130888122368 positive 0.3486 NaN

tweet_id airline_sentiment airline_sentiment_confidence negativereason negativereason_c

2 570301083672813571 neutral 0.6837 NaN

3 570301031407624196 negative 1.0000 Bad Flight

4 570300817074462722 negative 1.0000 Can't Tell

5 570300767074181121 negative 1.0000 Can't Tell

6 570300616901320704 positive 0.6745 NaN

7 570300248553349120 neutral 0.6340 NaN

8 570299953286942721 positive 0.6559 NaN

9 570295459631263746 positive 1.0000 NaN

Out[15]: tweet_id airline_sentiment airline_sentiment_confidence negativereason negativereason_

0 570306133677760513 neutral 1.0000 NaN

1 570301130888122368 positive 0.3486 NaN

2 570301083672813571 neutral 0.6837 NaN

3 570301031407624196 negative 1.0000 Bad Flight

4 570300817074462722 negative 1.0000 Can't Tell

tweet_id airline_sentiment airline_sentiment_confidence negativereason negativereason_

5 570300767074181121 negative 1.0000 Can't Tell

6 570300616901320704 positive 0.6745 NaN

7 570300248553349120 neutral 0.6340 NaN

8 570299953286942721 positive 0.6559 NaN

9 570295459631263746 positive 1.0000 NaN

10 570294189143031808 neutral 0.6769 NaN

11 570289724453216256 positive 1.0000 NaN

12 570289584061480960 positive 1.0000 NaN

13 570287408438120448 positive 0.6451 NaN

14 570285904809598977 positive 1.0000 NaN

15 570282469121007616 negative 0.6842 Late Flight

16 570277724385734656 positive 1.0000 NaN