Download as pdf or txt
Download as pdf or txt
You are on page 1of 30

5.

DataVisulation

June 12, 2020

0.0.1 Type of Graphs


line plot continous data distribution / usefull in time series data
histogram single variable continous data distribution
boxplot single variable continous data distribution
matrix plot or heatmap multi-variable continous data density distribution
scatter plot to seek relationship between two variables
bar plot or count plot to see proportions of categorical data, distribution of categorical single
variable
pie chart same as bar chart
Quantative
• line
• histogram
• box
• scatter
• matplot
Categorical
• bar
• pie
[1]: import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
%matplotlib inline

[11]: plt.rcParams['figure.figsize'] = (10, 5)


plt.rcParams['axes.labelcolor'] = 'green'
plt.rcParams['axes.titlesize'] = 20
plt.rcParams['axes.labelsize'] = 20
plt.rcParams['xtick.color'] = 'red'
plt.rcParams['xtick.labelsize'] = 15
plt.rcParams['ytick.color'] = 'red'

1
plt.rcParams['ytick.labelsize'] = 15
plt.rcParams['figure.dpi'] = 300

0.1 Line Plot


plt.plot() -> line plot
sns.lineplot() -> line plot
[19]: data1 = np.random.normal(140, 10, 100)
data2 = np.random.normal(130, 20, 100)

[21]: plt.plot(data1, 'c', label='group-1') # Time Series


plt.plot(data2, color='gold', label='group-2', alpha=0.5)

plt.show()

[22]: data = np.random.randint(10, 50, 100).reshape(20, 5)

[26]: plt.plot(data)#, colors=['gold', 'cyan', 'silver', 'orange', '#123456'])


plt.show()

2
0.2 Histogram
[27]: tip = sns.load_dataset('tips')

[28]: tip.head()

[28]: total_bill tip sex smoker day time size


0 16.99 1.01 Female No Sun Dinner 2
1 10.34 1.66 Male No Sun Dinner 3
2 21.01 3.50 Male No Sun Dinner 3
3 23.68 3.31 Male No Sun Dinner 2
4 24.59 3.61 Female No Sun Dinner 4

[34]: plt.plot(tip['total_bill'], 'c')


plt.ylabel('Total Bill')
plt.show()

3
[33]: sns.lineplot(x=range(tip.shape[0]), y='total_bill', data=tip, color='cyan')
plt.show()

[39]: plt.hist(tip['total_bill'], bins=30, color='w', ec='c', histtype='step')


plt.show()

4
[42]: India = np.random.normal(140, 10, 100)
USA = np.random.normal(130, 20, 100)

[51]: plt.hist(India, color='white', ec='c', bins=30, label='India', histtype='step')


plt.hist(USA, color='white', ec='gold', bins=30, label='USA', histtype='step',␣
,→alpha=0.5)

plt.ylabel("Height")
plt.legend()
plt.show()

5
[159]: fig, ax = plt.subplots()
sns.distplot(tip['total_bill'], ax=ax, bins=30)
plt.show()

[163]: fig, ax = plt.subplots()


sns.distplot(tip['total_bill'], ax=ax, bins=30, kde=False,
hist_kws={ 'ec':'gold', 'histtype':'step'})
plt.show()

6
[166]: fig, ax = plt.subplots()
sns.distplot(tip['total_bill'], ax=ax, bins=30, kde=False,
hist_kws={ 'ec':'silver', 'color':'gold'}, rug=True,
rug_kws={'color':'cyan'})
plt.show()

[169]: fig, ax = plt.subplots()


sns.distplot(tip['total_bill'], ax=ax, bins=30,
hist_kws={ 'ec':'silver', 'color':'gold'}, rug=True,
rug_kws={'color':'cyan','lw':2},
kde_kws={'color':'r', 'lw':2, 'ls':'--'}
)
plt.show()

7
[59]: plt.boxplot(tip['total_bill'], showmeans=True,
showfliers=False, showcaps=False, showbox=False)
plt.show()

[62]: plt.boxplot(tip['total_bill'], showmeans=True,


flierprops={'markeredgecolor':'r'})
plt.show()

8
[70]: plt.boxplot(tip['total_bill'], showmeans=True,
flierprops={'markeredgecolor':'r', 'markerfacecolor': 'g', 'markersize':
,→10})

plt.show()

[74]: plt.boxplot(tip['total_bill'], showmeans=True,


flierprops={'markeredgecolor':'r'},
capprops={'color':'gold', 'linewidth': 3})
plt.show()

9
[77]: plt.boxplot(tip['total_bill'], showmeans=True,
flierprops={'markeredgecolor':'r'},
capprops={'color':'gold', 'linewidth': 3},
whiskerprops={'color': 'cyan', 'lw':2})
plt.show()

[83]: plt.boxplot(tip['total_bill'], showmeans=True,


flierprops={'markeredgecolor':'r'},
capprops={'color':'gold', 'linewidth': 3},

10
whiskerprops={'color': 'cyan', 'lw':2},
notch=True,
boxprops={'color': 'y'})
plt.show()

[89]: plt.boxplot(tip['total_bill'], showmeans=True,patch_artist=True,


flierprops={'markeredgecolor':'r'},
capprops={'color':'gold', 'linewidth': 3},
whiskerprops={'color': 'cyan', 'lw':2},
notch=True,
boxprops={'color': 'y'})
plt.show()

11
[90]: plt.boxplot(tip['total_bill'], showmeans=True,patch_artist=True,
flierprops={'markeredgecolor':'r'},
capprops={'color':'gold', 'linewidth': 3},
whiskerprops={'color': 'cyan', 'lw':2},
notch=True,
boxprops={'color': 'y'},
medianprops={'color':'white', 'lw': 3}
)
plt.show()

12
[119]: plt.boxplot(tip['total_bill'], showmeans=True,patch_artist=True,
flierprops={'markeredgecolor':'r'},
capprops={'color':'gold', 'linewidth': 3},
whiskerprops={'color': 'cyan', 'lw':2},
notch=True,
boxprops={'color': 'y'},
medianprops={'color':'white', 'lw': 3, 'ls':'--'},
meanprops={'markeredgecolor': 'white', 'marker': 'D', 'markerfacecolor':
,→ 'red',

'markersize':10}
)
plt.show()

[116]: df = pd.DataFrame({ 'India':data1, 'USA':data2})


df.head()

[116]: India USA


0 139.519480 135.887954
1 133.789628 129.095135
2 152.413057 130.037377
3 129.712683 125.216627
4 127.921550 142.761354

[120]: plt.boxplot([df['India'], df['USA']])


plt.xticks([1, 2], ['India', 'USA'])
plt.ylabel('Height')
plt.xlabel('Country')

13
plt.legend()
plt.show()

No handles with labels found to put in legend.

[156]: fig, ax = plt.subplots()


p = ax.boxplot([df['India'], df['USA']], showmeans=True, patch_artist=True,␣
,→notch=True)

ax.set_xticks([1, 2])
ax.set_xticklabels(['India', 'USA'])
ax.set_ylabel('Height')
ax.set_xlabel('Country')
p['boxes'][0].set_color('blue')
p['boxes'][1].set_color('red')
ax.plot([], [], 'b-', label='India', lw=10)
ax.plot([], [], 'r-', label='USA', lw=10)
ax.legend(loc=10)
plt.show()

14
[173]: male = tip[tip['sex'] == 'Male']['total_bill']
female = tip[tip['sex']=='Female']['total_bill']

[174]: male.head()

[174]: 1 10.34
2 21.01
3 23.68
5 25.29
6 8.77
Name: total_bill, dtype: float64

[175]: female.head()

[175]: 0 16.99
4 24.59
11 35.26
14 14.83
16 10.33
Name: total_bill, dtype: float64

[178]: fig, ax = plt.subplots()


p = ax.boxplot([male, female], showmeans=True, patch_artist=True, notch=True)
ax.set_xticks([1, 2])
ax.set_xticklabels(['Male', 'Female'])
ax.set_ylabel('Height')
ax.set_xlabel('Country')
p['boxes'][0].set_color('blue')

15
p['boxes'][1].set_color('red')
ax.plot([], [], 'b-', label='Male', lw=10)
ax.plot([], [], 'r-', label='Female', lw=10)
ax.legend(loc=10)
plt.show()

[180]: sns.boxplot('sex', 'total_bill', data=tip, notch=True)


plt.show()

16
[184]: sns.boxplot('sex', 'total_bill', data=tip, notch=True,
hue='smoker', showmeans=True)
plt.show()

[187]: sns.boxenplot('sex', 'total_bill', data=tip,hue='smoker')


plt.show()

17
[191]: sns.violinplot('sex', 'total_bill', data=tip,hue='smoker')
plt.show()

[192]: sns.violinplot('sex', 'total_bill', data=tip,hue='smoker', split=True)


plt.show()

[203]: sns.swarmplot('sex', 'total_bill', data=tip,hue='smoker')


plt.show()

18
[205]: sns.stripplot('sex', 'total_bill', data=tip,hue='smoker')
plt.show()

[206]: sns.swarmplot('sex', 'total_bill', data=tip,hue='smoker')


sns.violinplot('sex', 'total_bill', data=tip,hue='smoker', split=True)

plt.show()

19
[199]: sns.kdeplot(tip.total_bill, tip.tip)
sns.rugplot(tip.total_bill, color='g')
sns.rugplot(tip.tip, color='b', vertical=True)

plt.show()

Is there any relation between tip and total_bill

20
[208]: plt.scatter('total_bill','tip', data=tip)
plt.xlabel('Total Bill')
plt.ylabel("Tip")
plt.show()

[210]: sns.scatterplot('total_bill', 'tip', data=tip)


plt.show()

21
[213]: print(np.corrcoef(tip.total_bill, tip.tip))

[[1. 0.67573411]
[0.67573411 1. ]]

[214]: mpg = sns.load_dataset('mpg')

[215]: mpg.head()

[215]: mpg cylinders displacement horsepower weight acceleration \


0 18.0 8 307.0 130.0 3504 12.0
1 15.0 8 350.0 165.0 3693 11.5
2 18.0 8 318.0 150.0 3436 11.0
3 16.0 8 304.0 150.0 3433 12.0
4 17.0 8 302.0 140.0 3449 10.5

model_year origin name


0 70 usa chevrolet chevelle malibu
1 70 usa buick skylark 320
2 70 usa plymouth satellite
3 70 usa amc rebel sst
4 70 usa ford torino

[218]: sns.scatterplot('horsepower', 'mpg', data=mpg)


plt.show()

[221]: mpg.shape

22
[221]: (398, 9)

[225]: mpg = mpg.dropna()

[226]: np.corrcoef(mpg.mpg, mpg.horsepower)

[226]: array([[ 1. , -0.77842678],


[-0.77842678, 1. ]])

[229]: sns.scatterplot('displacement', 'horsepower', data=mpg)


plt.show()

[232]: from IPython import display

[234]: display.Image("https://www.oreilly.com/library/view/introduction-to-machine/
,→9781449369880/assets/malp_01in02.png",

height=300, width=500)
[234]:

23
[230]: iris = sns.load_dataset('iris')

[235]: iris.head()

[235]: sepal_length sepal_width petal_length petal_width species


0 5.1 3.5 1.4 0.2 setosa
1 4.9 3.0 1.4 0.2 setosa
2 4.7 3.2 1.3 0.2 setosa
3 4.6 3.1 1.5 0.2 setosa
4 5.0 3.6 1.4 0.2 setosa

[243]: sns.scatterplot('petal_width', 'petal_length', data=iris)


plt.show()

24
[238]: np.corrcoef(iris.sepal_length, iris.sepal_width)

[238]: array([[ 1. , -0.11756978],


[-0.11756978, 1. ]])

[247]: tip.sex[:5]

[247]: 0 Female
1 Male
2 Male
3 Male
4 Female
Name: sex, dtype: category
Categories (2, object): [Male, Female]

[249]: color = list(map( lambda s: 'cyan' if s == 'Male' else 'pink', tip.sex.values ))


color[:5]

[249]: ['pink', 'cyan', 'cyan', 'cyan', 'pink']

[253]: plt.scatter('total_bill', 'tip', data=tip, color=color)


plt.plot([],[], 'oc',label='Male' )
plt.plot([], [], 'o', color='pink', label='Female')
plt.xlabel('Total Bill')
plt.ylabel('Tip')
plt.legend()
plt.show()

25
[264]: sns.scatterplot('total_bill', 'tip', hue='sex', data=tip)
plt.show()

[265]: sns.scatterplot('total_bill', 'tip', hue='sex', data=tip, size='smoker')


plt.show()

26
[260]: color = list(map(lambda b: 'red' if b else '#eeeeee', (tip.tip > 5)))

[261]: plt.scatter('total_bill', 'tip', data=tip, color=color)


plt.xlabel('Total Bill')
plt.ylabel('Tip')
plt.show()

[262]: mpg.head()

27
[262]: mpg cylinders displacement horsepower weight acceleration \
0 18.0 8 307.0 130.0 3504 12.0
1 15.0 8 350.0 165.0 3693 11.5
2 18.0 8 318.0 150.0 3436 11.0
3 16.0 8 304.0 150.0 3433 12.0
4 17.0 8 302.0 140.0 3449 10.5

model_year origin name


0 70 usa chevrolet chevelle malibu
1 70 usa buick skylark 320
2 70 usa plymouth satellite
3 70 usa amc rebel sst
4 70 usa ford torino

[268]: sns.scatterplot('horsepower', 'mpg', data=mpg, hue='origin')


plt.show()

[282]: color = list(map(lambda b: 'red' if b else '#eeeeee',((mpg.horsepower > 100) &␣


,→(mpg.mpg > 20))))

markdown -> insights findout

[283]: plt.scatter('horsepower', 'mpg', data=mpg, color=color)


plt.show()

28
bar, pie, matplot
[284]: sns.get_dataset_names()

C:\Anaconda3\lib\site-packages\seaborn\utils.py:376: UserWarning: No parser was


explicitly specified, so I'm using the best available HTML parser for this
system ("lxml"). This usually isn't a problem, but if you run this code on
another system, or in a different virtual environment, it may use a different
parser and behave differently.

The code that caused this warning is on line 376 of the file
C:\Anaconda3\lib\site-packages\seaborn\utils.py. To get rid of this warning,
pass the additional argument 'features="lxml"' to the BeautifulSoup constructor.

gh_list = BeautifulSoup(http)

[284]: ['anscombe',
'attention',
'brain_networks',
'car_crashes',
'diamonds',
'dots',
'exercise',
'flights',
'fmri',
'gammas',
'geyser',
'iris',

29
'mpg',
'penguins',
'planets',
'tips',
'titanic']

[ ]:

30

You might also like