Professional Documents
Culture Documents
East West Airlines Output
East West Airlines Output
import pandas as pd
import numpy as np
East.dtypes
Out[5]:
ID# int64
Balance int64
Qual_miles int64
cc1_miles int64
cc2_miles int64
cc3_miles int64
Bonus_miles int64
Bonus_trans int64
Flight_miles_12mo int64
Flight_trans_12 int64
Days_since_enroll int64
Award? int64
dtype: object
East.info()
<class 'pandas.core.frame.DataFrame'>
dtypes: int64(12)
warnings.warn(
Out[9]: <AxesSubplot:xlabel='Balance'>
winsor = Winsorizer(capping_method='iqr', # choose IQR rule boundaries or gaussian for mean and std
fold=1.5,
variables=['Balance'])
East_t = winsor.fit_transform(East[['Balance']])
sns.boxplot(East_t.Balance)
warnings.warn(
Out[20]: <AxesSubplot:xlabel='Balance'>

sns.boxplot(East.Qual_miles)
warnings.warn(
Out[22]: <AxesSubplot:xlabel='Qual_miles'>

#Apply Winsor
winsor = Winsorizer(capping_method='iqr', # choose IQR rule boundaries or gaussian for mean and std
variables=['Qual_miles'])
East_Q = winsor.fit_transform(East[['Qual_miles']])
sns.boxplot(East_Q.Qual_miles)
warnings.warn(
Out[31]: <AxesSubplot:xlabel='Qual_miles'>
sns.boxplot(East.cc1_miles)
C:\Users\Public\conda\lib\site-packages\seaborn\_decorators.py:36: FutureWarning: Pass the following
variable as a keyword arg: x. From version 0.12, the only valid positional argument will be `data`, and
passing other arguments without an explicit keyword will result in an error or misinterpretation.
warnings.warn(
Out[33]: <AxesSubplot:xlabel='cc1_miles'>
sns.boxplot(East.cc2_miles)
warnings.warn(
Out[36]: <AxesSubplot:xlabel='cc2_miles'>

#Apply Winsor
winsor = Winsorizer(capping_method='iqr', # choose IQR rule boundaries or gaussian for mean and std
variables=['cc2_miles'])
East_c = winsor.fit_transform(East[['cc2_miles']])
sns.boxplot(East_c.cc2_miles)
warnings.warn(
Out[45]: <AxesSubplot:xlabel='cc2_miles'>
sns.boxplot(East.cc3_miles)
C:\Users\Public\conda\lib\site-packages\seaborn\_decorators.py:36: FutureWarning: Pass the following
variable as a keyword arg: x. From version 0.12, the only valid positional argument will be `data`, and
passing other arguments without an explicit keyword will result in an error or misinterpretation.
warnings.warn(
Out[47]: <AxesSubplot:xlabel='cc3_miles'>
#Apply Winsor
from feature_engine.outliers import Winsorizer
winsor = Winsorizer(capping_method='iqr', # choose IQR rule boundaries or gaussian for mean and std
fold=1.5,
variables=['cc3_miles'])
East_c3 = winsor.fit_transform(East[['cc3_miles']])
sns.boxplot(East_c3.cc3_miles)
warnings.warn(
Out[56]: <AxesSubplot:xlabel='cc3_miles'>

sns.boxplot(East.Bonus_miles)
warnings.warn(
Out[58]: <AxesSubplot:xlabel='Bonus_miles'>

#Apply Winsor
winsor = Winsorizer(capping_method='iqr', # choose IQR rule boundaries or gaussian for mean and std
variables=['Bonus_miles'])
winsor = Winsorizer(capping_method='iqr', # choose IQR rule boundaries or gaussian for mean and std
fold=1.5,
variables=['Bonus_miles'])
East_B = winsor.fit_transform(East[['Bonus_miles']])
sns.boxplot(East_B.Bonus_miles)
warnings.warn(
Out[68]: <AxesSubplot:xlabel='Bonus_miles'>

sns.boxplot(East.Bonus_trans)
warnings.warn(
Out[70]: <AxesSubplot:xlabel='Bonus_trans'>

#Apply Winsor
winsor = Winsorizer(capping_method='iqr', # choose IQR rule boundaries or gaussian for mean and std
variables=['Bonus_trans'])
East_t = winsor.fit_transform(East[['Bonus_trans']])
sns.boxplot(East_t.Bonus_trans)
warnings.warn(
Out[79]: <AxesSubplot:xlabel='Bonus_trans'>
sns.boxplot(East.Flight_miles_12mo)
C:\Users\Public\conda\lib\site-packages\seaborn\_decorators.py:36: FutureWarning: Pass the following
variable as a keyword arg: x. From version 0.12, the only valid positional argument will be `data`, and
passing other arguments without an explicit keyword will result in an error or misinterpretation.
warnings.warn(
Out[81]: <AxesSubplot:xlabel='Flight_miles_12mo'>
#Apply Winsor
from feature_engine.outliers import Winsorizer
winsor = Winsorizer(capping_method='iqr', # choose IQR rule boundaries or gaussian for mean and std
fold=1.5,
variables=['Flight_miles_12mo'])
East_F = winsor.fit_transform(East[['Flight_miles_12mo']])
sns.boxplot(East_F.Flight_miles_12mo)
warnings.warn(
Out[90]: <AxesSubplot:xlabel='Flight_miles_12mo'>

sns.boxplot(East.Flight_trans_12)
warnings.warn(
Out[92]: <AxesSubplot:xlabel='Flight_trans_12'>

#Apply Winsor
winsor = Winsorizer(capping_method='iqr', # choose IQR rule boundaries or gaussian for mean and std
variables=['Flight_trans_12'])
East_F2 = winsor.fit_transform(East[['Flight_trans_12']])
sns.boxplot(East_F2.Flight_trans_12)
warnings.warn(
Out[101]: <AxesSubplot:xlabel='Flight_trans_12'>
sns.boxplot(East.Days_since_enroll)
C:\Users\Public\conda\lib\site-packages\seaborn\_decorators.py:36: FutureWarning: Pass the following
variable as a keyword arg: x. From version 0.12, the only valid positional argument will be `data`, and
passing other arguments without an explicit keyword will result in an error or misinterpretation.
warnings.warn(
Out[103]: <AxesSubplot:xlabel='Days_since_enroll'>
East.isna().sum()
Out[106]:
ID# 0
Balance 0
Qual_miles 0
cc1_miles 0
cc2_miles 0
cc3_miles 0
Bonus_miles 0
Bonus_trans 0
Flight_miles_12mo 0
Flight_trans_12 0
Days_since_enroll 0
Award? 0
dtype: int64
#Plot Histogram
plt.hist(East_B.Bonus_miles)
Out[108]:
346.]),
plt.hist(East_t.Bonus_trans)
Out[109]:
101.]),
array([ 0. , 3.8, 7.6, 11.4, 15.2, 19. , 22.8, 26.6, 30.4, 34.2, 38. ]),
plt.hist(East_F.Flight_miles_12mo)
Out[110]:
617.]),
plt.hist(East_F2.Flight_trans_12)
Out[111]:
565.]),
array([0. , 0.25, 0.5 , 0.75, 1. , 1.25, 1.5 , 1.75, 2. , 2.25, 2.5 ]),
# Normalization function
def norm_func(i):
x = (i-i.min()) / (i.max()-i.min())
return (x)
df_norm.describe()
Out[117]:
Qual_miles cc1_miles ... Days_since_enroll Award?
[8 rows x 10 columns]
# Dendrogram
dendrogram(z,
)
plt.show()
h_complete.labels_
cluster_labels = pd.Series(h_complete.labels_)
Out[128]:
[5 rows x 12 columns]
East1.iloc[:, 1:].groupby(East1.clust).mean()
Out[130]:
clust ...
[3 rows x 11 columns]
East1.iloc[:, 2:].groupby(East1.clust).std()
Out[131]:
clust ...
[3 rows x 10 columns]
import os
os.getcwd()
#In Cluster 1 flying patterns, earning and use of frequent flyer rewards, and use of the airline credit card
usage is high
#In Cluster 2 flying patterns, earning and use of frequent flyer rewards, and use of the airline credit card
usage is medium
#In Cluster 0 flying patterns, earning and use of frequent flyer rewards, and use of the airline credit card
usage is low