Download as pptx, pdf, or txt
Download as pptx, pdf, or txt
You are on page 1of 30

Ques.

5- Using Iris data, plot the following with proper legend and axis labels:

(Download IRIS data from:

https://archive.ics.uci.edu/ml/datasets/iris or import it from sklearn datasets)

a. Plot bar chart to show the frequency of each class label in the data.

b. Draw a scatter plot for Petal width vs sepal width and fit a regression line

c. Plot density distribution for feature petal length.

d. Use a pair plot to show pairwise bivariate distribution in the Iris Dataset.

e. Draw heatmap for the four numeric attributes

f. Compute mean, mode, median, standard deviation, confidence interval and standard error for each feature

g. Compute correlation coefficients between each pair of features and plot heatmap
import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
from sklearn.datasets import load_iris
from scipy import stats
# Load the Iris dataset
iris = load_iris()
iris_data = pd.DataFrame(data=np.c_[iris['data'], iris['target']], columns=iris['feature_names'] + ['target'])

# Set the plotting style


sns.set(style="whitegrid")
# a. Plot bar chart to show the frequency of each class label in

the data

class_counts = iris_data['target'].value_counts()

class_labels = iris['target_names']

plt.figure(figsize=(8, 6))

plt.bar(class_labels, class_counts)

plt.xlabel('Class Labels')

plt.ylabel('Frequency')

plt.title('Frequency of Each Class Label')

plt.show()
# b. Draw a scatter plot for Petal width vs sepal width and fit a regression line

plt.figure(figsize=(8, 6))

sns.regplot(x='sepal width (cm)', y='petal width (cm)', data=iris_data)

plt.xlabel('Sepal Width (cm)')

plt.ylabel('Petal Width (cm)')

plt.title('Petal width vs Sepal width')

plt.show()
# c. Plot density distribution for feature petal length

plt.figure(figsize=(8, 6))

sns.kdeplot(data=iris_data, x='petal length (cm)', fill=True)

plt.xlabel('Petal Length (cm)')

plt.ylabel('Density')

plt.title('Density Distribution of Petal Length')

plt.show()
# d. Use a pair plot to show pairwise bivariate distribution in the Iris Dataset

sns.pairplot(iris_data, hue='target')

plt.show()

# e. Draw heatmap for the four numeric attributes

numeric_attributes = iris_data.drop('target', axis=1)

plt.figure(figsize=(8, 6))

sns.heatmap(numeric_attributes.corr(), annot=True, cmap='coolwarm')

plt.title('Heatmap of Correlation between Numeric Attributes')

plt.show()
# f. Compute mean, mode, median, standard deviation, confidence interval, and standard error

for each feature

feature_stats = iris_data.describe().loc[['mean', 'std', '50%']]

feature_stats = feature_stats.append(pd.Series(stats.mode(iris_data.iloc[:, :-1])[0][0], name='mode'))

confidence_interval = stats.t.interval(0.95, len(iris_data.iloc[:, :-1])-1,

loc=feature_stats.loc['mean'],

scale=stats.sem(iris_data.iloc[:, :-1]))

feature_stats = feature_stats.append(pd.Series(confidence_interval,

index=feature_stats.columns, name='confidence interval'))

standard_error = stats.sem(iris_data.iloc[:, :-1])

feature_stats = feature_stats.append(pd.Series(standard_error, index=feature_stats.columns,

name='standard error'))

print("Feature Statistics:")

print(feature_stats)
# g. Compute correlation coefficients between each pair of features and plot heatmap

correlation_matrix = iris_data.iloc[:, :-1].corr()

plt.figure(figsize=(8, 6))

sns.heatmap(correlation_matrix, annot=True, cmap='coolwarm')

plt.title('Heatmap of Correlation Coefficients')

plt.show()
Ques. 6- Consider the following data frame containing a family name, gender of the family member and her/his
monthly income in each record.
Name Gender MonthlyIncome (Rs.)
Shah Male 114000.00
Vats Male 65000.00
Vats Female 43150.00
Kumar Female 69500.00
Vats Female 155000.00
Kumar Male 103000.00
Shah Male 55000.00
Shah Female 112400.00
Kumar Female 81030.00
Vats Male 71900.00
a. Calculate and display familywise gross monthly income.

b. Calculate and display the member with the highest monthly income.

c. Calculate and display monthly income of all members with income greater than Rs. 60000.00.

d. Calculate and display the average monthly income of the female members

import pandas as pd
# Create the DataFrame
data = {
'Name': ['Shah', 'Vats', 'Vats', 'Kumar', 'Vats', 'Kumar', 'Shah', 'Shah', 'Kumar', 'Vats'],
'Gender': ['Male', 'Male', 'Female', 'Female', 'Female', 'Male', 'Male', 'Female', 'Female', 'Male'],
'MonthlyIncome (Rs.)': [114000.00, 65000.00, 43150.00, 69500.00, 155000.00, 103000.00,
55000.00, 112400.00, 81030.00, 71900.00]
}
df = pd.DataFrame(data)
# a. Calculate and display familywise gross monthly income.

family_gross_income = df.groupby('Name')['MonthlyIncome (Rs.)'].sum()

print("Family-wise Gross Monthly Income:")

print(family_gross_income)

print()
# b. Calculate and display the member with the highest monthly income.

highest_income_member = df.loc[df['MonthlyIncome (Rs.)'].idxmax()]

print("Member with the Highest Monthly Income:")

print(highest_income_member)

print()
# c. Calculate and display monthly income of all members with income greater than Rs. 60000.00.

high_income_members = df[df['MonthlyIncome (Rs.)'] > 60000.00]

print("Monthly Income of Members with Income Greater than Rs. 60000.00:")

print(high_income_members['MonthlyIncome (Rs.)'])

print()
# d. Calculate and display the average monthly income of the female members.

average_female_income = df.loc[df['Gender'] == 'Female', 'MonthlyIncome (Rs.)'].mean()

print("Average Monthly Income of Female Members:", average_female_income)


Ques. 7- Using Titanic dataset, to do the following:

a. Find total number of passengers with age less than 30

b. Find total fare paid by passengers of first class

c. Compare number of survivors of each passenger class

d. Compute descriptive statistics for any numeric attribute genderwise

import pandas as pd

# Load the Titanic dataset

titanic_df = pd.read_csv('titanic.csv')
# a. Find the total number of passengers with age less than 30

total_passengers_age_less_than_30 = titanic_df[titanic_df['Age'] < 30]['PassengerId'].count()

print("Total number of passengers with age less than 30:", total_passengers_age_less_than_30)

print()
# b. Find the total fare paid by passengers of first class

total_fare_first_class = titanic_df[titanic_df['Pclass'] == 1]['Fare'].sum()

print("Total fare paid by passengers of first class:", total_fare_first_class)

print()
# c. Compare the number of survivors of each passenger class

survivors_by_class = titanic_df.groupby('Pclass')['Survived'].sum()

print("Number of survivors by passenger class:")

print(survivors_by_class)

print()
# d. Compute descriptive statistics for any numeric attribute genderwise

numeric_attribute = 'Age' # You can choose any numeric attribute (e.g., 'Fare', 'SibSp', 'Parch', etc.)

descriptive_statistics_genderwise = titanic_df.groupby('Sex')[numeric_attribute].describe()

print("Descriptive statistics for", numeric_attribute, "genderwise:")

print(descriptive_statistics_genderwise)

You might also like