Download as doc, pdf, or txt
Download as doc, pdf, or txt
You are on page 1of 48

EX.

NO:02(a) WORKING WITH NUMPY ARRAYS


(BASIC NUMPY OPERATIONS)

PROGRAM:

(i)Creation of different types of Numpy arrays and displaying basic information.

#Importing numpy
import numpy as np

#Defining and printing 1D array


my1DArray=np.array([1,8,27,64])
print(my1DArray)

#Defining and printing 2D array


my2DArray=np.array([[1,2,3,4],[2,4,9,16],[4,8,18,32]])
print(my2DArray)

#Defining and printing 3D array


my3DArray=np.array([[[1,2,3,4],[5,6,7,8]],[[1,2,3,4],[9,10,11,12]]])
print(my3DArray)

#Print Out Memory Address


print(my2DArray.data)

#Print The Shape of Array


print(my2DArray.shape)

#Print Out The Data Type of The Array


(my2DArray.dtype)

#Print The Strides Of The Array


print(my2DArray.strides)

(ii) Creation of an array using built-in NumPY functions

#Array of ones
ones=np.ones((3,4))
print(ones)
#Array of zeros
zeros=np,zeros((2,3,4),dtype=np.int16)
print(zeros)

#Array with random values


np.random.random((2,2))

#Empty Array
emptyArray=np.empty((3,2))
print(emptyArray)

#Full Array
fullArray=np.full((2,2),7)
print(fullArray))

#Array of evenly-spaced values


evenSpacedArray=np.arange(10,25,5)
print(evenSpacedArray)

#Array of evenly-spaced values


evenSpacedArray2=np.linspace(0,2,9)
print(evenspacedArray2)

(iii) Performing file operations with NumPy arrays

import numpy as np

#intialize an array
arr=np.array([[[11,11,9,9],[11,0,2,0]],[[10,14,9,14],[0,1,11,11]]])

#open a binary file in write mode


file=open("arr","wb")

#save array to a file


np.save(file,arr)

#close the file


file.close

#open the file in a read binary mode


file=open("arr","rb")
#read the file to numpy array
arr1=np.load(file)

#print the array


print(arr1)

Output:

(i)Creation of different types of Numpy arrays and displaying basic information.

O/P:

[1,8,27,64]

# 1D ARRAY

O/P:

[ 3 1 23 13]

# 2D ARRAY

O/P:

[[ 1 2 3 4]

[ 2 4 9 16]

[ 4 8 18 32]]

#3D ARRAY

O/P:

[[[ 1 2 3 4]

[ 5 6 7 8]]

[[ 1 2 3 4]

[ 9 10 11 12]]]
#OutMemory Address

O/P:

<memory at 0x000001E023196BA0>

#THE SHAPE OF ARRAY

O/P:

(3, 4)

#OUT THE DATA TYPE OF THE ARRAY

O/P:

dtype('int32')

#THE STRIDES OF THE ARRAY

O/P:

(16, 4)

II)CREATION OF AN ARRAY USING BUILT-IN NUMPY FUNCTIONS

#ARRAY OF ONES

O/P:

[[1. 1. 1. 1.]

[1. 1. 1. 1.]

[1. 1. 1. 1.]]

#ARRAY OF ZEROS

O/P:

[[[0 0 0 0]

[0 0 0 0]

[0 0 0 0]]
[[0 0 0 0]

[0 0 0 0]

[0 0 0 0]]]

#ARRAY WITH RANDOM VALUES

O/P:

array([[0.11569802, 0.68270419],

[0.16448154, 0.60236614]])

#EMPTY ARRAY

O/P:

[[0. 0.]

[0. 0.]

[0. 0.]]

#FULL ARRAY

O/P:

[[7 7]

[7 7]]

#ARRAY OF EVENLY-SPACED VALUES

O/P:

[10 15 20]

#ARRAY OF EVENLY-SPACED VALUES

O/P:

[0. 0.25 0.5 0.75 1. 1.25 1.5 1.75 2. ]


III)PERFORMING FILE OPERATION WITH NUMPY ARRAYS

#INITIALIZE AN ARRAY

arr=np.array([[[11,11,9,9],[11,0,2,0]],[[10,14,9,14],[0,1,11,11]]])

#OPEN A BINARY FILE IN WRITE MODE

file=open("arr","wb")

#SAVE ARRAY TO A FILE

np.save(file,arr)

#CLOSE THE FILE

O/P:

<function BufferedWriter.close>

#OPEN THE IN A READ BINARY MODE

file=open("arr","rb")

#READ THE FILE TO NUMPY ARRAY

arr1=np.load(file)

#PRINT THE ARRAY

O/P:

[[[11 11 9 9]

[11 0 2 0]]

[[10 14 9 14]

[ 0 1 11 11]]]
EX. NO:02(b) BASIC ARITHMETIC OPERATIOS WITH NUMPY ARRAYS

PROGRAM:

import numpy as np
a=np.arange(9,dtype=np.float_).reshape(3,3)
print('First array:')
print(a)
print('\n')

print('Second array:')
b=np.array([10,10,10])
print(b)
print('\n')

print('Add the two arrays:')


print(np.add(a,b))
print('\n')

print('Subract the two arrays:')


print(np.subract(a,b))
print('\n')

print('Multiply the two arrays:')


print(np.multiply(a,b))
print('\n')

print('Divide the two arrays:')


print(np.divide(a,b))
print('\n')

OUTPUT:

First array:
[[0. 1. 2.]
[3. 4. 5.]
[6. 7. 8.]]

Second array:
[10 10 10]
Add the two arrays:
[[10. 11. 12.]
[13. 14. 15.]
[16. 17. 18.]]

Subtract the two arrays:


[[-10. -9. -8.]
[-7. -6. -5.]
[-4. -3. -2.]]

Multiply the two arrays:


[[0. 10. 20.]
[30. 40. 50.]
[60. 70. 80.]]

Divide the two arrays:


[[0. 0.1 0.2]
[0.3 0.4 0.5]
[0.6 0.7 0.8]]
EX.NO:03 WORKING WITH PANDAS DATAFRAMES

PROGRAM:

(i)Creation of a dataframe from a series:

import numpy as np
import pandas as pd
print("Pandas Version:",pd_version_)
pd.set_option('display.max_columns',500)
pd.set_option('display.max_rows',500)
series=pd.Series([2,3,7,11,13,17,19,23])
print(series)
series_df=pd.DataFrame({
'A':range(1,5),
'B':pd.Timestamp('20190526'),
'C':pd.Series(5,index=list(range(4)),dtype='float64'),
'D':np.array([3]*4,dtype='int64'),
'E':pd.Categorical(["Depression","Social Anxiety","Bipolar Disorder","Eating Disorder"]),
'F':'Mental health',
'G':'Is Challenging'
})
print(series_df)

(ii)Creation of a dataframe from Dictionary:

import numpy as np
import pandas as pd
dict_df=[{'A':'Apple','B':'Ball'},{'A':'Aeroplane','B':'Bat','C':'Cat'}]
dict_df=pd.DataFrame(dict_df)
print(dict_df)

(iii)Creation of a dataframe from N-Dimensional Arrays:

import numpy as np
import pandas as pd
sdf={'Country':['Ostfold','Hordaland','Oslo','Hedmark','Oppland','Buskerud'],
'ISO-Code':[1,2,3,4,5,6],
'Area':[4180.69,4917.94,454.07,27397.76,25192.10,14910.94],
'Administrative Centre':["Sarposborg","Oslo","City Of Oslo", "Hamar", "Lillehammer",
"Drammen"]}
sdf=pd.DataFrame(sdf)
print(sdf)
(iv)Loading a dataset from an external source into a pandas dataframe

import numpy as np
import pandas as pd
columns=[‘age’, ‘workclass’, ‘fnlwgt’, ‘education’, ‘education_num’, ‘marital_status’,
‘occupation’, ‘relationship’, ‘ethnicity’, ‘gender’, ‘capital_gain’, ‘capital_loss’,
‘hours_per_week’, ‘country_of_origin’, ‘income’,]
df=pd.read_csv(‘http://archive.ics.uci.edu/ml/machine-learning-databases/adult/
adult.data’,names=columns)
df.head(10)

OUTPUT:

(i)Creation of a dataframe from a series:

Pandas Version:1.3.4

0 2
1 3
2 7
3 11
4 13
5 17
6 19
7 23

Dtype:int64

A B C D E F G
0 1 25-11-22 5.0 3 Depression Mental health is challenging
1 2 25-11-22 5.0 3 Social Anxiety Mental health is challenging
Bipolar
2 3 25-11-22 5.0 3 Mental health is challenging
Disorder
3 4 25-11-22 5.0 3 Eating Disorder Mental health is challenging

(ii)Creation of a dataframe from Dictionary:

A B C
0 Apple Ball NaN
1 Aeroplane Bat Cat
(iii)Creation of a dataframe from N-Dimensional Array:

S.No Country ISO-Code Area Administrative centre


0 Ostfold 1 4180.69 Sarpsborg
1 Hordaland 2 4917.94 Oslo
2 Oslo 3 454.07 City of Oslo
3 Hedmark 4 27397.76 Hamar
4 Oppland 5 25192.10 Lillehammer
5 Buskerud 6 14910.94 Drammen
EX.NO:04(a) READING DATA FROM TEXT FILES AND EXPLORING
VARIOUS COMMANDS FOR DOING DESCRIPTIVE
ANALYTICS ON THE IRIS DATA SET

PROGRAM:

# Read Text Files with Pandas using read_csv()


# importing pandas
import pandas as pd
# read text file into pandas DataFrame
df = pd.read_csv("gfg.txt", sep=" ")
# display DataFrame
print(df)

OUTPUT:
EX NO: 4(b) READING DATA FROM EXCEL AND EXPLORING VARIOUS

COMMANDS FOR DOING DESCRIPTIVE ANALYTICS ON


THE IRIS DATA SET

PROGRAM:

import pandas as pd

df = pd.read_excel('sample.xlsx')

print(df)

Load all sheets

Case 1:If sheet_name argument is none, all sheets are read.

Syntax

df_sheet_all = pd.read_excel('sample.xlsx', sheet_name=None)

print(df_sheet_all)

Case2:In this case, the sheet name becomes the key.

Syntax

print(df_sheet_all['sheet1'])

# A B C

# one 11 12 13

# two 21 22 23

# three 31 32 33

print(type(df_sheet_all['sheet1']))

# <class 'pandas.core.frame.DataFrame'>

print(df_sheet_all['sheet2'])

print(type(df_sheet_all['sheet2']))

# <class 'pandas.core.frame.DataFrame'>
OUTPUT:

Unnamed: 0 A B C

0 one 11 12 13

1 two 21 22 23

2 three 31 32 33

# AA BB CC

# ONE 11 12 13

# TWO 21 22 23

# THREE 31 32 33
EX. NO:04(c) READING DATA FROM WEB AND EXPLORING VARIOUS
COMMANDS FOR DOING DESCRIPTIVE ANALYTICS ON
THE IRIS DATA SET

PROGRAM:

import pandas as pd
# Reading the CSV file
df = pd.read_csv("Iris.csv")

# Printing top 5 rows


df.head()
df.shape
df.info()
df.describe()
df.isnull().sum()
data = df.drop_duplicates(subset ="Species",)
data
df.value_counts("Species")
# importing packages
import seaborn as sns
import matplotlib.pyplot as plt
sns.countplot(x='Species', data=df, )
plt.show()

# importing packages
import seaborn as sns
import matplotlib.pyplot as plt
sns.scatterplot(x='SepalLengthCm', y='SepalWidthCm',hue='Species', data=df, )

# Placing Legend outside the Figure


plt.legend(bbox_to_anchor=(1, 1), loc=2)
plt.show()

# importing packages
import seaborn as sns
import matplotlib.pyplot as plt
sns.scatterplot(x='PetalLengthCm', y='PetalWidthCm',hue='Species', data=df, )

# Placing Legend outside the Figure


plt.legend(bbox_to_anchor=(1, 1), loc=2)
plt.show()
# importing packages
import seaborn as sns
import matplotlib.pyplot as plt
sns.pairplot(df.drop(['Id'], axis = 1),hue='Species', height=2)

# importing packages
import seaborn as sns
import matplotlib.pyplot as plt
fig, axes = plt.subplots(2, 2, figsize=(10,10))
axes[0,0].set_title("Sepal Length")
axes[0,0].hist(df['SepalLengthCm'], bins=7)
axes[0,1].set_title("Sepal Width")
axes[0,1].hist(df['SepalWidthCm'], bins=5);
axes[1,0].set_title("Petal Length")
axes[1,0].hist(df['PetalLengthCm'], bins=6);
axes[1,1].set_title("Petal Width")
axes[1,1].hist(df['PetalWidthCm'], bins=6);

# importing packages
import seaborn as sns
import matplotlib.pyplot as plt
plot = sns.FacetGrid(df, hue="Species")
plot.map(sns.distplot, "SepalLengthCm").add_legend()
plot = sns.FacetGrid(df, hue="Species")
plot.map(sns.distplot, "SepalWidthCm").add_legend()
plot = sns.FacetGrid(df, hue="Species")
plot.map(sns.distplot, "PetalLengthCm").add_legend()
plot = sns.FacetGrid(df, hue="Species")
plot.map(sns.distplot, "PetalWidthCm").add_legend()
plt.show()
data.corr(method='pearson')

# importing packages
import seaborn as sns
import matplotlib.pyplot as plt
sns.heatmap(df.corr(method='pearson').drop(['Id'], axis=1).drop(['Id'], axis=0), annot
= True);
plt.show()

# importing packages
import seaborn as sns
import matplotlib.pyplot as plt
def graph(y):
sns.boxplot(x="Species", y=y, data=df)
plt.figure(figsize=(10,10))

# Adding the subplot at the specified


# grid position
plt.subplot(221)
graph('SepalLengthCm')

plt.subplot(222)
graph('SepalWidthCm')

plt.subplot(223)
graph('PetalLengthCm')

plt.subplot(224)
graph('PetalWidthCm')
plt.show()

# importing packages
import seaborn as sns
import matplotlib.pyplot as plt

# Load the dataset


df = pd.read_csv('Iris.csv')

sns.boxplot(x='SepalWidthCm', data=df)

# Importing
import sklearn
from sklearn.datasets import load_boston
import pandas as pd
import seaborn as sns

# Load the dataset


df = pd.read_csv('Iris.csv')

# IQR
Q1 = np.percentile(df['SepalWidthCm'], 25,interpolation = 'midpoint')
Q3 = np.percentile(df['SepalWidthCm'], 75,interpolation = 'midpoint')
IQR = Q3 - Q1

print("Old Shape: ", df.shape)


# Upper bound
upper = np.where(df['SepalWidthCm'] >= (Q3+1.5*IQR))

# Lower bound
lower = np.where(df['SepalWidthCm'] <= (Q1-1.5*IQR))

# Removing the Outliers


df.drop(upper[0], inplace = True)
df.drop(lower[0], inplace = True)

print("New Shape: ", df.shape)

sns.boxplot(x='SepalWidthCm', data=df)

OUTPUT:
EX. NO:05(a) Finding Frequency, Mean, Median, Mode,Variance, Standard
deviation,Skewness and Kurtosis from uci dataset using Univariate
Analysis Method
PROGRAM:

import numpy as np
import scipy as sp
from scipy import stats
import matplotlib.pyplot as plt

## generate the data and plot it for an ideal normal curve

## x-axis for the plot


x_data = np.arange(-5, 5, 0.001)

## y-axis as the gaussian


y_data = stats.norm.pdf(x_axis, 0, 1)

## plot data
plt.plot(x_data, y_data)plt.show()
## setting the seed for the random generation
np.random.seed(1)

## generating univariate data


data = 10 * np.random.randn(1000) + 100

## plotting the data


plt.hist(data)plt.show()
plt.hist(data, bins=100)
plt.show()

df['Age'].mean()

## output: 68.57490118577076
df['Age'].median()
## output: 77.5
df.describe()

## trim = 0.1 drops 10% from each end

stats.trim_mean(df['Age'], 0.1)

## output: 71.19605911330049
Variance
df['Age'].var()
Standard deviation
df['Age'].std()

## output: 28.148861406903617

Median absolute deviation from the median


df['Age'].mad()

## output: 24.610885188020433
Range
df['Age'].iloc[df['Age'].idxmax] - df['Age'].iloc[df['Age'].idxmin()]

## output: 97.1
Interquartile range
Q1 = df['Age'].quantile(0.25)
Q3 = df['Age'].quantile(0.75)
IQR = Q3 - Q1

## Output: 49.04999999999999

Output:
EX. NO:05(b) Finding Frequency, Mean, Median, Mode,Variance,
Standarddeviation,Skewness and Kurtosis from Pima Indians
Diabetes data set using Univariate Analysis Method
Preparing the DataSet:

library(corrplot)
## Warning: package 'corrplot' was built under R version 3.4.3
## corrplot 0.84 loaded
library(caret)
## Warning: package 'caret' was built under R version 3.4.3
## Loading required package: lattice
## Loading required package: ggplot2
## Warning: package 'ggplot2' was built under R version 3.4.3
pima <-read.csv("https://archive.ics.uci.edu/ml/machine-learning-databases/
pima-indians-diabetes/pima-indians-diabetes.data",
col.names=c("Pregnant","Plasma_Glucose","Dias_BP","Triceps_Skin","Serum_Insulin","
BMI","DPF","Age","Diabetes"))
head(pima) # # visualize the header of Pima data
str(pima) # show the structure of the data
sapply(pima, function(x) sum(is.na(x)))
pairs(pima, panel = panel.smooth)
corrplot(cor(pima[, -9]), type = "lower", method = "number")

Output:
EX. NO:05(c) Finding Frequency, Mean, Median, Mode,Variance, Standard
deviation,Skewness and Kurtosis from uci dataset and Pima Indians
Diabetes using Bivariate Analysis Method

PROGRAM:

import warnings
warnings.filterwarnings('ignore')
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
%matplotlib inline
sns.set_style('darkgrid')
df = pd.read_csv('../input/pima-indians-diabetes-database/diabetes.csv')
df.head()
df.Outcome = df.Outcome.replace({0:'Non-Diab',1:'Diab'})
df.DiabetesPedigreeFunction = df.rename({'DiabetesPedigreeFunction':'DPF'},inplace =
True,axis =1)
df.head()
df.dtypes
df.shape
df.info()
df.describe().T
plt.figure(dpi=120)
sns.pairplot(df)
plt.show()
plt.figure(dpi = 120)
sns.pairplot(df,hue = 'Outcome',palette = 'plasma')
plt.legend(['Non Diabetic','Diabetic'])
plt.show()
#correlation
plt.figure(dpi = 120,figsize= (5,4))
mask = np.triu(np.ones_like(df.corr(),dtype = bool))
sns.heatmap(df.corr(),mask = mask, fmt = ".2f",annot=True,lw=1,cmap = 'plasma')
plt.yticks(rotation = 0)
plt.xticks(rotation = 90)
plt.title('Correlation Heatmap')
plt.show()
#Jointplots
plt.figure(dpi = 100, figsize = (5,4))
print("Joint plot of Glucose with Other Variables ==> \n")
for i in df.columns:
if i != 'Glucose' and i != 'Outcome':
print(f"Correlation between Glucose and {i} ==> ",df.corr().loc['Glucose'][i])
sns.jointplot(x='Glucose',y=i,data=df,kind = 'regression',color = 'purple')
plt.show()

#Glucose shows positive weak linear association with other variable in given dataset

col = list(df.columns)
idx = col.index('BloodPressure')

plt.figure(dpi = 100, figsize = (5,4))


print("Joint plot of BloodPressure with Other Variables ==> \n")
for i in range(idx+1,len(col)-1):
print(f"Correlation between BloodPressure and {col[i]} ==>
",df.corr().loc['BloodPressure'][col[i]])
sns.jointplot(x='BloodPressure',y=col[i],data=df,kind = 'regression',color = 'green')
plt.show()

#BloodPressure shows positive weak linear association with other variable in given
dataset.

col = list(df.columns)
idx = col.index('SkinThickness')

plt.figure(dpi = 100, figsize = (5,4))


print("Joint plot of SkinThickness with Other Variables ==> \n")
for i in range(idx+1,len(col)-1):
print(f"Correlation between SkinThickness and {col[i]} ==>
",df.corr().loc['SkinThickness'][col[i]])
sns.jointplot(x='SkinThickness',y=col[i],data=df,kind = 'regression',color = 'blue')
plt.show()

col = list(df.columns)
idx = col.index('Insulin')

plt.figure(dpi = 100, figsize = (5,4))


print("Joint plot of Insulin with Other Variables ==> \n")
for i in range(idx+1,len(col)-1):
print(f"Correlation between Insulin and {col[i]} ==> ",df.corr().loc['Insulin'][col[i]])
sns.jointplot(x='Insulin',y=col[i],data=df,kind = 'regression',color = 'green')
plt.show()
# BMI shows positive weak linear association with other variable in given dataset.

col = list(df.columns)
idx = col.index('DPF')

plt.figure(dpi = 100, figsize = (5,4))


print("Joint plot of DPF with Other Variables ==> \n")
for i in range(idx+1,len(col)-1):
print(f"Correlation between DPF and {col[i]} ==> ",df.corr().loc['DPF'][col[i]])
sns.jointplot(x='DPF',y=col[i],data=df,kind = 'regression',color = 'red')
plt.show()

# DPF shows positive weak linear association with other variable in given dataset.

x= df.iloc[:,:-1].values
y= df.iloc[:,-1].values

from sklearn.decomposition import PCA


pca = PCA(n_components=2)
pca.fit(x)

x_new = pca.transform(x)

xs = x[:,0]
ys = x[:,1]

plt.figure(dpi=100)
sns.scatterplot(x=xs,y=ys,hue=y).set_title('Dependency of Data with Outcome')
plt.xlabel('PCA Feature 1')
plt.ylabel('PCA Feature 2')
plt.show()
SAMPLE OUTPUT:

S. Pregnanc Glucos Blood Skin Ins BMI Diabetes Age Outcome


n ies e Pressu Thickne ulin Pedigree
o re ss Function
0 6 148 72 35 0 33.6 0.627 50 1
1 1 85 66 29 0 26.6 0.351 31 0
2 8 183 64 0 0 23.3 0.672 32 1
3 1 89 66 23 94 28.1 0.167 21 0
4 0 137 40 35 168 43.1 2.288 33 1

S. Pregnanc Glucos Blood Skin Insul BMI Diabetes Age Outcome


n ies e Pressu Thickne in Pedigree
o re ss Functio
n
0 6 148 72 35 0 33.6 0.627 50 Diab
1 1 85 66 29 0 26.6 0.351 31 Non-Diab

2 8 183 64 0 0 23.3 0.672 32 Diab


3 1 89 66 23 94 28.1 0.167 21 Non-Diab
4 0 137 40 35 168 43.1 2.288 33 Diab
EX. NO:06(a) APPLY AND EXPLORE VARIOUS PLOTTING FUNCTIONS
ON UCI DATA SETS USING NORMAL CURVES
PROGRAM:
# importing the required libraries
from sklearn import datasets
import pandas as pd
import seaborn as sns
# Setting up the Data Frame
iris = datasets.load_iris()
iris_df = pd.DataFrame(iris.data, columns=['Sepal_Length', 'Sepal_Width', 'Patal_Length',
'Petal_Width']
iris_df['Target'] = iris.target
iris_df['Target'].replace([0], 'Iris_Setosa', inplace=True)
iris_df['Target'].replace([1], 'Iris_Vercicolor', inplace=True)
iris_df['Target'].replace([2], 'Iris_Virginica', inplace=True)
# Plotting the KDE Plot
sns.kdeplot(iris_df.loc[(iris_df['Target'] =='Iris_Virginica'),
'Sepal_Length'], color = 'b', shade = True, Label ='Iris_Virginica')

SAMPLE OUTPUT:
EX. NO:06(b) APPLY AND EXPLORE VARIOUS PLOTTING FUNCTIONS
ON UCI DATA SETS USING DENSITY AND CONTOUR PLOTS
PROGRAM:
%matplotlib inline
import matplotlib.pyplot as plt
plt.style.use('seaborn-white')
import numpy as np
%matpldef f(x, y):
return np.sin(x) ** 10 + np.cos(10 + y * x) * np.cos(x)
x = np.nspace(0, 5, 50)
y = np.linspace(0, 5, 40)
X, Y = np.meshgrid(x, y)
Z = f(X, Y)
plt.contour(X, Y, Z, colors='black');
plt.contour(X, Y, Z, 20, cmap='RdGy');
plt.contourf(X, Y, Z, 20, cmap='RdGy')
plt.colorbar();
plt.imshow(Z, extent=[0, 5, 0, 5], origin='lower',cmap='RdGy')
plt.colorbar()
plt.axis(aspect='image');
contours = plt.contour(X, Y, Z, 3, colors='black')
plt.clabel(contours, inline=True, fontsize=8)
plt.imshow(Z, extent=[0, 5, 0, 5], origin='lower', cmap='RdGy', alpha=0.5)
plt.colorbar();
SAMPLE OUTPUT:
EX. NO:06(c) APPLY AND EXPLORE VARIOUS PLOTTING FUNCTIONS
ON UCI DATA SETS USING CORRELATION AND SCATTER
PLOTS
PROGRAM:
import pandas as pd
con=pd.read_csv(‘Data/ConcreteStrength.csv’)
con
list(con.columns)
con.rename(columns={‘Fly ash’: ‘FlyAsh’: ‘CoarseAgg’: ‘FineAggr’: ‘FineAgg’:
‘Air Entrainment’: ‘AirEntrain’: ‘Compressive Strength (28-day)(Mpa)’:
‘Strength’},inplace=True)
con.head()
con[‘AirEntrain’]=con[‘AirEntrain’].astype(‘category)
con.describe(include= ‘category’)
list(con.columns)
import seaborn as sns
sns.scatterplot(x= “FlyAsh”,y= “Strength”,data=con);
ax=sns.scatterplot(x= “FlyAsh”,y= “Strength”,data=con)
ax.sert_title(“ Concrete Strength vs. Fly ash”)
ax.set_xlabel(“Fly ash”);
sns.lmplot(x= “FlyAsh”,y= “Strength”,data=con”);
sns.lmplot(x= “FlyAsh”,y= “Strength”,hue= “AirEntrain”,data=con”);
from scipy import stats
stats.pearsonr(con[‘Strength’],con[‘FlyAsh’])
cormat=con.corr()
round(cormat,2)
sns.heatmap(cormat);
SAMPLE OUTPUT:
EX. NO:06(d) APPLY AND EXPLORE VARIOUS PLOTTING FUNCTIONS
ON UCI DATA SETS USING HISTOGRAM
PROGRAM:
import pandas as pd
bank = pd.read_csv('Data/Bank.csv')
bank['Salary'].mean()
sal = bank['Salary']
sal.min(), sal.mean(), sal.median(), sal.max()
bank.describe()
import seaborn as sns
sns.histplot(x=bank['Salary'])
<AxesSubplot:xlabel='Salary', ylabel='Count'>
sns.histplot(x=bank['Salary'], bins=10, kde=True);
sns.histplot(x=bank['Salary'],
bins=10, kde=False,
stat="probability",
color='green' );

SAMPLE OUTPUT:

39.921923076923086

(26.7, 39.921923076923086, 37.0, 97.0)


EX. NO:06(e) APPLY AND EXPLORE VARIOUS PLOTTING FUNCTIONS
ON UCI DATA SETS USING THREE DIMENSIONAL PLOTTING
PROGRAM:
from mpl_toolkits import mplot3d
%matplotlib inline
import numpy as np
import matplotlib.pyplot as plt
fig = plt.figure()
ax = plt.axes(projection='3d')
ax = plt.axes(projection='3d')
# Data for a three-dimensional line
zline = np.linspace(0, 15,1000)
xline = np.sin(zline)
yline = np.cos(zline)
ax.plot3D(xline, yline, zline, 'gray')
# Data for three-dimensional scattered points
zdata = 15 * np.random.random(100)
xdata = np.sin(zdata) + 0.1 * np.random.randn(100)
ydata = np.cos(zdata) + 0.1 * np.random.randn(100)
ax.scatter3D(xdata, ydata, zdata, c=zdata, cmap='Greens');
def f(x, y):
return np.sin(np.sqrt(x ** 2 + y ** 2))
x = np.linspace(-6, 6, 30)
y = np.linspace(-6, 6, 30)
X, Y = np.meshgrid(x, y)
Z = f(X, Y)
fig = plt.figure()
ax = plt.axes(projection='3d')
ax.contour3D(X, Y, Z, 50, cmap='binary')
ax.set_xlabel('x')
ax.set_ylabel('y')
ax.set_zlabel('z');
ax.view_init(60, 35)
fig
SAMPLE OUTPUT:
EX.NO:07 VISUALIZE GEOGRAPHIC DATA WITH BASE MAP

PROGRAM:
%matplotlib inline
import numpy as np
import matplotlib.pyplot as plt
from mpl_toolkits.basemap import Basemap
plt.figure(figsize=(8, 8))
m = Basemap(projection='ortho', resolution=None, lat_0=50, lon_0=-100)
m.bluemarble(scale=0.5);
fig = plt.figure(figsize=(8, 8))
m = Basemap(projection='lcc', resolution=None, width=8E6, height=8E6,
lat_0=45, lon_0=-100,)
m.etopo(scale=0.5, alpha=0.5)

# Map (long, lat) to (x, y) for plotting


x, y = m(-122.3, 47.6)
plt.plot(x, y, 'ok', markersize=5)
plt.text(x, y, ' Seattle', fontsize=12);
SAMPLE OUTPUT:

You might also like