5.datavisulation: 0.0.1 Type of Graphs

5.
DataVisulation
June 12, 2020
0.0.1 Type of Graphs

line plot continous data distribution / usefull in time series data
histogram single variable continous data distribution
boxplot single variable continous data distribution
matrix plot or heatmap multi-variable continous data density distribution
scatter plot to seek relationship between two variables
bar plot or count plot to see proportions of categorical data, distribution of categorical single
variable
pie chart same as bar chart
Quantative
• line
• histogram
• box
• scatter
• matplot
Categorical
• bar
• pie
[1]: import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
%matplotlib inline
[11]: plt.rcParams['figure.figsize'] = (10, 5)

plt.rcParams['axes.labelcolor'] = 'green'
plt.rcParams['axes.titlesize'] = 20
plt.rcParams['axes.labelsize'] = 20
plt.rcParams['xtick.color'] = 'red'
plt.rcParams['xtick.labelsize'] = 15
plt.rcParams['ytick.color'] = 'red'
1
plt.rcParams['ytick.labelsize'] = 15
plt.rcParams['figure.dpi'] = 300
0.1 Line Plot

plt.plot() -> line plot
sns.lineplot() -> line plot
[19]: data1 = np.random.normal(140, 10, 100)
data2 = np.random.normal(130, 20, 100)
[21]: plt.plot(data1, 'c', label='group-1') # Time Series

plt.plot(data2, color='gold', label='group-2', alpha=0.5)
plt.show()
[22]: data = np.random.randint(10, 50, 100).reshape(20, 5)
[26]: plt.plot(data)#, colors=['gold', 'cyan', 'silver', 'orange', '#123456'])

plt.show()
2
0.2 Histogram
[27]: tip = sns.load_dataset('tips')
[28]: tip.head()
[28]: total_bill tip sex smoker day time size

0 16.99 1.01 Female No Sun Dinner 2
1 10.34 1.66 Male No Sun Dinner 3
4 24.59 3.61 Female No Sun Dinner 4
[34]: plt.plot(tip['total_bill'], 'c')

plt.ylabel('Total Bill')
plt.show()
3
[33]: sns.lineplot(x=range(tip.shape[0]), y='total_bill', data=tip, color='cyan')
plt.show()
[39]: plt.hist(tip['total_bill'], bins=30, color='w', ec='c', histtype='step')

plt.show()
4
[42]: India = np.random.normal(140, 10, 100)
USA = np.random.normal(130, 20, 100)
[51]: plt.hist(India, color='white', ec='c', bins=30, label='India', histtype='step')

plt.hist(USA, color='white', ec='gold', bins=30, label='USA', histtype='step',␣
,→alpha=0.5)
plt.ylabel("Height")
plt.legend()
plt.show()
5
[159]: fig, ax = plt.subplots()
sns.distplot(tip['total_bill'], ax=ax, bins=30)
plt.show()

sns.distplot(tip['total_bill'], ax=ax, bins=30, kde=False,
hist_kws={ 'ec':'gold', 'histtype':'step'})
plt.show()
6
sns.distplot(tip['total_bill'], ax=ax, bins=30, kde=False,
hist_kws={ 'ec':'silver', 'color':'gold'}, rug=True,
rug_kws={'color':'cyan'})
plt.show()

sns.distplot(tip['total_bill'], ax=ax, bins=30,
hist_kws={ 'ec':'silver', 'color':'gold'}, rug=True,
rug_kws={'color':'cyan','lw':2},
kde_kws={'color':'r', 'lw':2, 'ls':'--'}
)
plt.show()
7
[59]: plt.boxplot(tip['total_bill'], showmeans=True,
showfliers=False, showcaps=False, showbox=False)
plt.show()

flierprops={'markeredgecolor':'r'})
plt.show()
8
flierprops={'markeredgecolor':'r', 'markerfacecolor': 'g', 'markersize':
,→10})
plt.show()

flierprops={'markeredgecolor':'r'},
capprops={'color':'gold', 'linewidth': 3})
plt.show()
9
capprops={'color':'gold', 'linewidth': 3},
whiskerprops={'color': 'cyan', 'lw':2})
plt.show()

10
whiskerprops={'color': 'cyan', 'lw':2},
notch=True,
boxprops={'color': 'y'})
plt.show()
[89]: plt.boxplot(tip['total_bill'], showmeans=True,patch_artist=True,

notch=True,
boxprops={'color': 'y'})
plt.show()
11
notch=True,
boxprops={'color': 'y'},
medianprops={'color':'white', 'lw': 3}
)
plt.show()
12
notch=True,
boxprops={'color': 'y'},
medianprops={'color':'white', 'lw': 3, 'ls':'--'},
meanprops={'markeredgecolor': 'white', 'marker': 'D', 'markerfacecolor':
,→ 'red',
'markersize':10}
)
plt.show()
[116]: df = pd.DataFrame({ 'India':data1, 'USA':data2})

df.head()
[116]: India USA

0 139.519480 135.887954
1 133.789628 129.095135
2 152.413057 130.037377
3 129.712683 125.216627
4 127.921550 142.761354
[120]: plt.boxplot([df['India'], df['USA']])

plt.xticks([1, 2], ['India', 'USA'])
plt.ylabel('Height')
plt.xlabel('Country')
13
plt.legend()
plt.show()
No handles with labels found to put in legend.

p = ax.boxplot([df['India'], df['USA']], showmeans=True, patch_artist=True,␣
,→notch=True)
ax.set_xticks([1, 2])
ax.set_xticklabels(['India', 'USA'])
ax.set_ylabel('Height')
ax.set_xlabel('Country')
p['boxes'][0].set_color('blue')
p['boxes'][1].set_color('red')
ax.plot([], [], 'b-', label='India', lw=10)
ax.plot([], [], 'r-', label='USA', lw=10)
ax.legend(loc=10)
plt.show()
14
[173]: male = tip[tip['sex'] == 'Male']['total_bill']
female = tip[tip['sex']=='Female']['total_bill']
[174]: male.head()
[174]: 1 10.34
2 21.01
3 23.68
5 25.29
6 8.77
Name: total_bill, dtype: float64
[175]: female.head()
[175]: 0 16.99
4 24.59
11 35.26
14 14.83
16 10.33
Name: total_bill, dtype: float64

p = ax.boxplot([male, female], showmeans=True, patch_artist=True, notch=True)
ax.set_xticks([1, 2])
ax.set_xticklabels(['Male', 'Female'])
ax.set_ylabel('Height')
ax.set_xlabel('Country')
p['boxes'][0].set_color('blue')
15
p['boxes'][1].set_color('red')
ax.plot([], [], 'b-', label='Male', lw=10)
ax.plot([], [], 'r-', label='Female', lw=10)
ax.legend(loc=10)
plt.show()
[180]: sns.boxplot('sex', 'total_bill', data=tip, notch=True)

plt.show()
16
[184]: sns.boxplot('sex', 'total_bill', data=tip, notch=True,
hue='smoker', showmeans=True)
plt.show()
[187]: sns.boxenplot('sex', 'total_bill', data=tip,hue='smoker')

plt.show()
17
[191]: sns.violinplot('sex', 'total_bill', data=tip,hue='smoker')
plt.show()
[192]: sns.violinplot('sex', 'total_bill', data=tip,hue='smoker', split=True)

plt.show()
[203]: sns.swarmplot('sex', 'total_bill', data=tip,hue='smoker')

plt.show()
18
[205]: sns.stripplot('sex', 'total_bill', data=tip,hue='smoker')
plt.show()
[206]: sns.swarmplot('sex', 'total_bill', data=tip,hue='smoker')

sns.violinplot('sex', 'total_bill', data=tip,hue='smoker', split=True)
plt.show()
19
[199]: sns.kdeplot(tip.total_bill, tip.tip)
sns.rugplot(tip.total_bill, color='g')
sns.rugplot(tip.tip, color='b', vertical=True)
plt.show()
Is there any relation between tip and total_bill
20
[208]: plt.scatter('total_bill','tip', data=tip)
plt.xlabel('Total Bill')
plt.ylabel("Tip")
plt.show()
[210]: sns.scatterplot('total_bill', 'tip', data=tip)

plt.show()
21
[213]: print(np.corrcoef(tip.total_bill, tip.tip))
[[1. 0.67573411]
[0.67573411 1. ]]
[214]: mpg = sns.load_dataset('mpg')
[215]: mpg.head()
[215]: mpg cylinders displacement horsepower weight acceleration \

0 18.0 8 307.0 130.0 3504 12.0
1 15.0 8 350.0 165.0 3693 11.5
2 18.0 8 318.0 150.0 3436 11.0
3 16.0 8 304.0 150.0 3433 12.0
4 17.0 8 302.0 140.0 3449 10.5
model_year origin name

0 70 usa chevrolet chevelle malibu
1 70 usa buick skylark 320
2 70 usa plymouth satellite
3 70 usa amc rebel sst
4 70 usa ford torino
[218]: sns.scatterplot('horsepower', 'mpg', data=mpg)

plt.show()
[221]: mpg.shape
22
[221]: (398, 9)
[225]: mpg = mpg.dropna()
[226]: np.corrcoef(mpg.mpg, mpg.horsepower)
[226]: array([[ 1. , -0.77842678],

[-0.77842678, 1. ]])
[229]: sns.scatterplot('displacement', 'horsepower', data=mpg)

plt.show()
[232]: from IPython import display
[234]: display.Image("https://www.oreilly.com/library/view/introduction-to-machine/
,→9781449369880/assets/malp_01in02.png",
height=300, width=500)
[234]:
23
[230]: iris = sns.load_dataset('iris')
[235]: iris.head()
[235]: sepal_length sepal_width petal_length petal_width species

0 5.1 3.5 1.4 0.2 setosa
1 4.9 3.0 1.4 0.2 setosa
2 4.7 3.2 1.3 0.2 setosa
3 4.6 3.1 1.5 0.2 setosa
4 5.0 3.6 1.4 0.2 setosa
[243]: sns.scatterplot('petal_width', 'petal_length', data=iris)

plt.show()
24
[238]: np.corrcoef(iris.sepal_length, iris.sepal_width)
[238]: array([[ 1. , -0.11756978],

[-0.11756978, 1. ]])
[247]: tip.sex[:5]
[247]: 0 Female
1 Male
2 Male
3 Male
4 Female
Name: sex, dtype: category
Categories (2, object): [Male, Female]
[249]: color = list(map( lambda s: 'cyan' if s == 'Male' else 'pink', tip.sex.values ))

color[:5]
[249]: ['pink', 'cyan', 'cyan', 'cyan', 'pink']
[253]: plt.scatter('total_bill', 'tip', data=tip, color=color)

plt.plot([],[], 'oc',label='Male' )
plt.plot([], [], 'o', color='pink', label='Female')
plt.ylabel('Tip')
plt.legend()
plt.show()
25
[264]: sns.scatterplot('total_bill', 'tip', hue='sex', data=tip)
plt.show()
[265]: sns.scatterplot('total_bill', 'tip', hue='sex', data=tip, size='smoker')

plt.show()
26
[260]: color = list(map(lambda b: 'red' if b else '#eeeeee', (tip.tip > 5)))
[261]: plt.scatter('total_bill', 'tip', data=tip, color=color)

plt.ylabel('Tip')
plt.show()
[262]: mpg.head()
27
[262]: mpg cylinders displacement horsepower weight acceleration \
0 18.0 8 307.0 130.0 3504 12.0
1 15.0 8 350.0 165.0 3693 11.5
2 18.0 8 318.0 150.0 3436 11.0
3 16.0 8 304.0 150.0 3433 12.0
4 17.0 8 302.0 140.0 3449 10.5
model_year origin name

0 70 usa chevrolet chevelle malibu
1 70 usa buick skylark 320
2 70 usa plymouth satellite
3 70 usa amc rebel sst
4 70 usa ford torino
[268]: sns.scatterplot('horsepower', 'mpg', data=mpg, hue='origin')

plt.show()
[282]: color = list(map(lambda b: 'red' if b else '#eeeeee',((mpg.horsepower > 100) &␣

,→(mpg.mpg > 20))))
markdown -> insights findout
[283]: plt.scatter('horsepower', 'mpg', data=mpg, color=color)

plt.show()
28
bar, pie, matplot
[284]: sns.get_dataset_names()
C:\Anaconda3\lib\site-packages\seaborn\utils.py:376: UserWarning: No parser was

explicitly specified, so I'm using the best available HTML parser for this
system ("lxml"). This usually isn't a problem, but if you run this code on
another system, or in a different virtual environment, it may use a different
parser and behave differently.
The code that caused this warning is on line 376 of the file
C:\Anaconda3\lib\site-packages\seaborn\utils.py. To get rid of this warning,
pass the additional argument 'features="lxml"' to the BeautifulSoup constructor.
gh_list = BeautifulSoup(http)
[284]: ['anscombe',
'attention',
'brain_networks',
'car_crashes',
'diamonds',
'dots',
'exercise',
'flights',
'fmri',
'gammas',
'geyser',
'iris',
29
'mpg',
'penguins',
'planets',
'tips',
'titanic']
[ ]:
30

5.datavisulation: 0.0.1 Type of Graphs

Uploaded by

Document Information

Original Description:

Original Title

Copyright

Available Formats

Share this document

Share or Embed Document

Sharing Options

Did you find this document useful?

Is this content inappropriate?

Copyright:

Available Formats

5.datavisulation: 0.0.1 Type of Graphs

Uploaded by

Copyright:

Available Formats

5.

June 12, 2020

0.0.1 Type of Graphs

[11]: plt.rcParams['figure.figsize'] = (10, 5)

0.1 Line Plot

[21]: plt.plot(data1, 'c', label='group-1') # Time Series

[22]: data = np.random.randint(10, 50, 100).reshape(20, 5)

[26]: plt.plot(data)#, colors=['gold', 'cyan', 'silver', 'orange', '#123456'])

[28]: total_bill tip sex smoker day time size

[34]: plt.plot(tip['total_bill'], 'c')

[39]: plt.hist(tip['total_bill'], bins=30, color='w', ec='c', histtype='step')

[51]: plt.hist(India, color='white', ec='c', bins=30, label='India', histtype='step')

[163]: fig, ax = plt.subplots()

[169]: fig, ax = plt.subplots()

[62]: plt.boxplot(tip['total_bill'], showmeans=True,

[74]: plt.boxplot(tip['total_bill'], showmeans=True,

[83]: plt.boxplot(tip['total_bill'], showmeans=True,

[89]: plt.boxplot(tip['total_bill'], showmeans=True,patch_artist=True,

[116]: df = pd.DataFrame({ 'India':data1, 'USA':data2})

[116]: India USA

[120]: plt.boxplot([df['India'], df['USA']])

No handles with labels found to put in legend.

[156]: fig, ax = plt.subplots()

[178]: fig, ax = plt.subplots()

[180]: sns.boxplot('sex', 'total_bill', data=tip, notch=True)

[187]: sns.boxenplot('sex', 'total_bill', data=tip,hue='smoker')

[192]: sns.violinplot('sex', 'total_bill', data=tip,hue='smoker', split=True)

[203]: sns.swarmplot('sex', 'total_bill', data=tip,hue='smoker')

[206]: sns.swarmplot('sex', 'total_bill', data=tip,hue='smoker')

Is there any relation between tip and total_bill

[210]: sns.scatterplot('total_bill', 'tip', data=tip)

[214]: mpg = sns.load_dataset('mpg')

[215]: mpg cylinders displacement horsepower weight acceleration \

model_year origin name

[218]: sns.scatterplot('horsepower', 'mpg', data=mpg)

[225]: mpg = mpg.dropna()

[226]: np.corrcoef(mpg.mpg, mpg.horsepower)

[226]: array([[ 1. , -0.77842678],

[229]: sns.scatterplot('displacement', 'horsepower', data=mpg)

[232]: from IPython import display

[235]: sepal_length sepal_width petal_length petal_width species

[243]: sns.scatterplot('petal_width', 'petal_length', data=iris)

[238]: array([[ 1. , -0.11756978],

[249]: color = list(map( lambda s: 'cyan' if s == 'Male' else 'pink', tip.sex.values ))

[249]: ['pink', 'cyan', 'cyan', 'cyan', 'pink']

[253]: plt.scatter('total_bill', 'tip', data=tip, color=color)

[265]: sns.scatterplot('total_bill', 'tip', hue='sex', data=tip, size='smoker')

[261]: plt.scatter('total_bill', 'tip', data=tip, color=color)

model_year origin name

[268]: sns.scatterplot('horsepower', 'mpg', data=mpg, hue='origin')

[282]: color = list(map(lambda b: 'red' if b else '#eeeeee',((mpg.horsepower > 100) &␣

markdown -> insights findout

[283]: plt.scatter('horsepower', 'mpg', data=mpg, color=color)

C:\Anaconda3\lib\site-packages\seaborn\utils.py:376: UserWarning: No parser was

You might also like