Download as docx, pdf, or txt
Download as docx, pdf, or txt
You are on page 1of 9

ds - Jupyter Notebook

In [1]:
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np

#freqtable

guests_rating = ['Below Average', 'Above Average', 'Above Average', 'Average', 'Above Ave
'Above Average', 'Average', 'Above Average', 'Below Average', 'Poor', 'Excellent', 'Abo
'Average', 'Above Average', 'Above Average', 'Below Average', 'Poor', 'Above Average', '

c=0
rating_unique = set (guests_rating) print (rating_unique)
freqchk=[]

for i in rating_unique:
c = guests_rating.count(i) freqchk.append(c)
freqchk = list(freqchk) print (freqchk)

samdict = dict(zip(rating_unique,freqchk)) print(samdict)

tab = pd.DataFrame.from_dict(samdict, orient = 'index') tab = tab.reindex(rating_unique)


tab = tab.reset_index()
tab.columns = ['Quality Rating' , 'Frequency'] tab

{'Excellent', 'Poor', 'Above Average', 'Below Average', 'Average'}


[1, 2, 9, 3, 5]
{'Excellent': 1, 'Poor': 2, 'Above Average': 9, 'Below Average': 3, 'Avera
ge': 5}

Out[1]:

Quality Rating Frequency

0 Excellent 1

1 Poor 2

2 Above Average 9

3 Below Average 3
4 Average 5

localhost:8889/notebooks/ds.ipynb 1/9
7/11/23, 7:43 PM ds - Jupyter Notebook

In [2]:

#bargraph

fig = plt.figure(figsize = (10, 5))

plt.bar(tab['Quality Rating'],tab['Frequency'] , color ='green',


width = 0.5)

plt.xlabel("Quality Rating")
plt.ylabel("Frequency")
plt.title("Marada inn quality ratings")
plt.show()

localhost:8889/notebooks/ds.ipynb 2/9
7/11/23, 7:43 PM ds - Jupyter Notebook

In [19]:
#piechart

colours = ["Grey", "green", "pink", "red","orange"]

plt.pie(tab['Frequency'], labels = tab['Quality Rating'], colors = colours, shadow = 'tru


plt.legend()
plt.show()

localhost:8889/notebooks/ds.ipynb 3/9
7/11/23, 7:43 PM ds - Jupyter Notebook

In [4]:
#histogram

data = ['Below Average', 'Above Average', 'Above Average', 'Average', 'Above Average', '
'Above Average', 'Average', 'Above Average', 'Below Average', 'Poor', 'Excellent', 'Abo
'Average', 'Above Average', 'Above Average', 'Below Average', 'Poor', 'Above Average',
seq = [0,1,2,3,4]
fig = plt.figure(figsize = (8, 6)) plt.xlabel("Quality Ratings") plt.ylabel("Frequency")
plt.title("Marada inn quality ratings")

plt.hist(data, bins= seq, ec = "yellow", color = 'green') plt.show()

In [5]:
#relativeand%freq
tab['Relative Frequency'] = tab['Frequency'] / tab['Frequency'].sum() tab['Pecent
Frequency'] = tab['Relative Frequency'] * 100
tab

Out[5]:

Quality Rating Frequency Relative Frequency Pecent Frequency

0 Excellent 1 0.05 5.0

1 Poor 2 0.10 10.0

2 Above Average 9 0.45 45.0

3 Below Average 3 0.15 15.0


4 Average 5 0.25 25.0

localhost:8889/notebooks/ds.ipynb 4/9
7/11/23, 7:43 PM ds - Jupyter Notebook

In [7]:
#scatterplot
no_of_interceptions_x = [1, 3, 2, 1, 3]
no_of_points_scored_y = [14, 24, 18, 17, 30]
plt.figure(figsize=(10, 5))
plt.scatter(no_of_interceptions_x, no_of_points_scored_y, color='grey') plt.xlabel("No of
Interceptions")
plt.ylabel("No of Points Scored")
plt.title('No of Interceptions vs No of Points Scored') plt.show()

localhost:8889/notebooks/ds.ipynb 5/9
7/11/23, 7:43 PM ds - Jupyter Notebook

In [10]:
#stackedbargraph
x = ['Colonial', 'Log', 'Split', 'A-Frame'] data_less_250k = [18, 6, 9, 12]
data_more_250k = [12, 14, 16, 3] plt.bar(x,data_less_250k , color='green')
plt.bar(x,data_more_250k, bottom=data_less_250k, color='pink') plt.show()

In [11]:
Out[11]:
#Crosstabulation

home_style =Colonial Log Split A-Frame


['Colonial', 'Log', 'Split', 'A-Frame']
<$250,000 18 6 19 12
price_less_than_250k = {'Colonial': 18, 'Log': 6, 'Split': 19, 'A-Frame': 12}
>=$250,000 12 14 16 3
price_greater_than_250k = {'Colonial': 12, 'Log': 14, 'Split': 16, 'A-Frame': 3}

cross_tab_df = pd.DataFrame([price_less_than_250k, price_greater_than_250k],


index=['<$250,000', '>=$250,000'])
cross_tab_df

localhost:8889/notebooks/ds.ipynb 6/9
7/11/23, 7:43 PM ds - Jupyter Notebook

In [12]:
#sidebyside bar chart
cross_tab_df.T.plot(kind='bar', figsize=(10, 5))
plt.ylim(0, 20)
plt.yticks(range(0, 21, 2)) plt.xticks(rotation=0) plt.xlabel("House Style")
plt.ylabel("Frequency") plt.title('Finger Lake Homes') plt.show()

localhost:8889/notebooks/ds.ipynb 7/9
7/11/23, 7:43 PM ds - Jupyter Notebook

In [18]:
import numpy as np
import matplotlib.pyplot as plt

d=[91,78,93,57,75,52,99,80,97,62,71,69,72,89,66,75,79,75,72,76,104,74,62,68,97,105,77,65
, 62,82,98,101,79,105,79,69,62,73]

def dotplot(input_x, **args):

# Count how many times does each value occur


unique_values, counts = np.unique(input_x, return_counts=True)

# Convert 1D input into 2D array


scatter_x = [] # x values
scatter_y = [] # corresponding y values
for idx, value in enumerate(unique_values):
for counter in range(1, counts[idx]+1): scatter_x.append(value) scatter_y.append(counter)

# draw dot plot using scatter()


plt.scatter(scatter_x, scatter_y, **args)

# Optional - show all unique values on x-axis. # Matplotlib might hide some of them
plt.gca().set_xticks(unique_values)

plt.figure(figsize=(30, 6), dpi=150) dotplot(input_x=d, marker='*', color='grey', s=100)


plt.xlabel("Parts_Cost", fontsize=15, labelpad=15) plt.ylabel("Frequency", fontsize=14,
labelpad=15) plt.title("Dot Plot For Parts Cost", fontsize=14, pad=15) plt.show()

localhost:8889/notebooks/ds.ipynb 8/9
7/11/23, 7:43 PM ds - Jupyter Notebook

In [15]:
# Creating histogram
fig, ax = plt.subplots(figsize =(10, 7)) ax.hist(d, bins = [50,60,70,80,90,100,110])
plt.xlabel("parts_Cost") plt.ylabel("Frequency") plt.title("Tune-up parts cost")

# Show plot
plt.show()

In [ ]:

localhost:8889/notebooks/ds.ipynb 9/9

You might also like