Presentation 1

Ques.
5- Using Iris data, plot the following with proper legend and axis labels:
(Download IRIS data from:
https://archive.ics.uci.edu/ml/datasets/iris or import it from sklearn datasets)
a. Plot bar chart to show the frequency of each class label in the data.
b. Draw a scatter plot for Petal width vs sepal width and fit a regression line
c. Plot density distribution for feature petal length.
d. Use a pair plot to show pairwise bivariate distribution in the Iris Dataset.
e. Draw heatmap for the four numeric attributes
f. Compute mean, mode, median, standard deviation, confidence interval and standard error for each feature
g. Compute correlation coefficients between each pair of features and plot heatmap
import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
from sklearn.datasets import load_iris
from scipy import stats
# Load the Iris dataset
iris = load_iris()
iris_data = pd.DataFrame(data=np.c_[iris['data'], iris['target']], columns=iris['feature_names'] + ['target'])
# Set the plotting style

sns.set(style="whitegrid")
# a. Plot bar chart to show the frequency of each class label in
the data
class_counts = iris_data['target'].value_counts()
class_labels = iris['target_names']
plt.figure(figsize=(8, 6))
plt.bar(class_labels, class_counts)
plt.xlabel('Class Labels')
plt.ylabel('Frequency')
plt.title('Frequency of Each Class Label')
plt.show()
# b. Draw a scatter plot for Petal width vs sepal width and fit a regression line
sns.regplot(x='sepal width (cm)', y='petal width (cm)', data=iris_data)
plt.xlabel('Sepal Width (cm)')
plt.ylabel('Petal Width (cm)')
plt.title('Petal width vs Sepal width')
plt.show()
# c. Plot density distribution for feature petal length
sns.kdeplot(data=iris_data, x='petal length (cm)', fill=True)
plt.xlabel('Petal Length (cm)')
plt.ylabel('Density')
plt.title('Density Distribution of Petal Length')
plt.show()
# d. Use a pair plot to show pairwise bivariate distribution in the Iris Dataset
sns.pairplot(iris_data, hue='target')
plt.show()
# e. Draw heatmap for the four numeric attributes
numeric_attributes = iris_data.drop('target', axis=1)
sns.heatmap(numeric_attributes.corr(), annot=True, cmap='coolwarm')
plt.title('Heatmap of Correlation between Numeric Attributes')
plt.show()
# f. Compute mean, mode, median, standard deviation, confidence interval, and standard error
for each feature
feature_stats = iris_data.describe().loc[['mean', 'std', '50%']]
feature_stats = feature_stats.append(pd.Series(stats.mode(iris_data.iloc[:, :-1])[0][0], name='mode'))
confidence_interval = stats.t.interval(0.95, len(iris_data.iloc[:, :-1])-1,
loc=feature_stats.loc['mean'],
scale=stats.sem(iris_data.iloc[:, :-1]))
feature_stats = feature_stats.append(pd.Series(confidence_interval,
index=feature_stats.columns, name='confidence interval'))
standard_error = stats.sem(iris_data.iloc[:, :-1])
feature_stats = feature_stats.append(pd.Series(standard_error, index=feature_stats.columns,
name='standard error'))
print("Feature Statistics:")
print(feature_stats)
# g. Compute correlation coefficients between each pair of features and plot heatmap
correlation_matrix = iris_data.iloc[:, :-1].corr()
sns.heatmap(correlation_matrix, annot=True, cmap='coolwarm')
plt.title('Heatmap of Correlation Coefficients')
plt.show()
Ques. 6- Consider the following data frame containing a family name, gender of the family member and her/his
monthly income in each record.
Name Gender MonthlyIncome (Rs.)
Shah Male 114000.00
Vats Male 65000.00
Vats Female 43150.00
Kumar Female 69500.00
Vats Female 155000.00
Kumar Male 103000.00
Shah Male 55000.00
Shah Female 112400.00
Kumar Female 81030.00
Vats Male 71900.00
a. Calculate and display familywise gross monthly income.
b. Calculate and display the member with the highest monthly income.
c. Calculate and display monthly income of all members with income greater than Rs. 60000.00.
d. Calculate and display the average monthly income of the female members
import pandas as pd
# Create the DataFrame
data = {
'Name': ['Shah', 'Vats', 'Vats', 'Kumar', 'Vats', 'Kumar', 'Shah', 'Shah', 'Kumar', 'Vats'],
'Gender': ['Male', 'Male', 'Female', 'Female', 'Female', 'Male', 'Male', 'Female', 'Female', 'Male'],
'MonthlyIncome (Rs.)': [114000.00, 65000.00, 43150.00, 69500.00, 155000.00, 103000.00,
55000.00, 112400.00, 81030.00, 71900.00]
}
df = pd.DataFrame(data)
# a. Calculate and display familywise gross monthly income.
family_gross_income = df.groupby('Name')['MonthlyIncome (Rs.)'].sum()
print("Family-wise Gross Monthly Income:")
print(family_gross_income)
print()
# b. Calculate and display the member with the highest monthly income.
highest_income_member = df.loc[df['MonthlyIncome (Rs.)'].idxmax()]
print("Member with the Highest Monthly Income:")
print(highest_income_member)
print()
# c. Calculate and display monthly income of all members with income greater than Rs. 60000.00.
high_income_members = df[df['MonthlyIncome (Rs.)'] > 60000.00]
print("Monthly Income of Members with Income Greater than Rs. 60000.00:")
print(high_income_members['MonthlyIncome (Rs.)'])
print()
# d. Calculate and display the average monthly income of the female members.
average_female_income = df.loc[df['Gender'] == 'Female', 'MonthlyIncome (Rs.)'].mean()
print("Average Monthly Income of Female Members:", average_female_income)

Ques. 7- Using Titanic dataset, to do the following:
a. Find total number of passengers with age less than 30
b. Find total fare paid by passengers of first class
c. Compare number of survivors of each passenger class
d. Compute descriptive statistics for any numeric attribute genderwise
import pandas as pd
# Load the Titanic dataset
titanic_df = pd.read_csv('titanic.csv')
# a. Find the total number of passengers with age less than 30
total_passengers_age_less_than_30 = titanic_df[titanic_df['Age'] < 30]['PassengerId'].count()
print("Total number of passengers with age less than 30:", total_passengers_age_less_than_30)
print()
# b. Find the total fare paid by passengers of first class
total_fare_first_class = titanic_df[titanic_df['Pclass'] == 1]['Fare'].sum()
print("Total fare paid by passengers of first class:", total_fare_first_class)
print()
# c. Compare the number of survivors of each passenger class
survivors_by_class = titanic_df.groupby('Pclass')['Survived'].sum()
print("Number of survivors by passenger class:")
print(survivors_by_class)
print()
# d. Compute descriptive statistics for any numeric attribute genderwise
numeric_attribute = 'Age' # You can choose any numeric attribute (e.g., 'Fare', 'SibSp', 'Parch', etc.)
descriptive_statistics_genderwise = titanic_df.groupby('Sex')[numeric_attribute].describe()
print("Descriptive statistics for", numeric_attribute, "genderwise:")
print(descriptive_statistics_genderwise)

Presentation 1

Uploaded by

Document Information

Copyright

Available Formats

Share this document

Share or Embed Document

Sharing Options

Did you find this document useful?

Is this content inappropriate?

Copyright:

Available Formats

Presentation 1

Uploaded by

Copyright:

Available Formats

Ques.

(Download IRIS data from:

https://archive.ics.uci.edu/ml/datasets/iris or import it from sklearn datasets)

c. Plot density distribution for feature petal length.

e. Draw heatmap for the four numeric attributes

# Set the plotting style

plt.title('Frequency of Each Class Label')

sns.regplot(x='sepal width (cm)', y='petal width (cm)', data=iris_data)

plt.xlabel('Sepal Width (cm)')

plt.ylabel('Petal Width (cm)')

plt.title('Petal width vs Sepal width')

sns.kdeplot(data=iris_data, x='petal length (cm)', fill=True)

plt.xlabel('Petal Length (cm)')

plt.title('Density Distribution of Petal Length')

# e. Draw heatmap for the four numeric attributes

numeric_attributes = iris_data.drop('target', axis=1)

sns.heatmap(numeric_attributes.corr(), annot=True, cmap='coolwarm')

plt.title('Heatmap of Correlation between Numeric Attributes')

for each feature

feature_stats = iris_data.describe().loc[['mean', 'std', '50%']]

feature_stats = feature_stats.append(pd.Series(stats.mode(iris_data.iloc[:, :-1])[0][0], name='mode'))

confidence_interval = stats.t.interval(0.95, len(iris_data.iloc[:, :-1])-1,

index=feature_stats.columns, name='confidence interval'))

standard_error = stats.sem(iris_data.iloc[:, :-1])

feature_stats = feature_stats.append(pd.Series(standard_error, index=feature_stats.columns,

correlation_matrix = iris_data.iloc[:, :-1].corr()

sns.heatmap(correlation_matrix, annot=True, cmap='coolwarm')

plt.title('Heatmap of Correlation Coefficients')

family_gross_income = df.groupby('Name')['MonthlyIncome (Rs.)'].sum()

print("Family-wise Gross Monthly Income:")

highest_income_member = df.loc[df['MonthlyIncome (Rs.)'].idxmax()]

print("Member with the Highest Monthly Income:")

high_income_members = df[df['MonthlyIncome (Rs.)'] > 60000.00]

print("Monthly Income of Members with Income Greater than Rs. 60000.00:")

average_female_income = df.loc[df['Gender'] == 'Female', 'MonthlyIncome (Rs.)'].mean()

print("Average Monthly Income of Female Members:", average_female_income)

a. Find total number of passengers with age less than 30

b. Find total fare paid by passengers of first class

c. Compare number of survivors of each passenger class

d. Compute descriptive statistics for any numeric attribute genderwise

# Load the Titanic dataset

total_passengers_age_less_than_30 = titanic_df[titanic_df['Age'] < 30]['PassengerId'].count()

print("Total number of passengers with age less than 30:", total_passengers_age_less_than_30)

total_fare_first_class = titanic_df[titanic_df['Pclass'] == 1]['Fare'].sum()

print("Total fare paid by passengers of first class:", total_fare_first_class)

print("Number of survivors by passenger class:")

print("Descriptive statistics for", numeric_attribute, "genderwise:")

You might also like