Professional Documents
Culture Documents
Presentation 1
Presentation 1
5- Using Iris data, plot the following with proper legend and axis labels:
a. Plot bar chart to show the frequency of each class label in the data.
b. Draw a scatter plot for Petal width vs sepal width and fit a regression line
d. Use a pair plot to show pairwise bivariate distribution in the Iris Dataset.
f. Compute mean, mode, median, standard deviation, confidence interval and standard error for each feature
g. Compute correlation coefficients between each pair of features and plot heatmap
import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
from sklearn.datasets import load_iris
from scipy import stats
# Load the Iris dataset
iris = load_iris()
iris_data = pd.DataFrame(data=np.c_[iris['data'], iris['target']], columns=iris['feature_names'] + ['target'])
the data
class_counts = iris_data['target'].value_counts()
class_labels = iris['target_names']
plt.figure(figsize=(8, 6))
plt.bar(class_labels, class_counts)
plt.xlabel('Class Labels')
plt.ylabel('Frequency')
plt.show()
# b. Draw a scatter plot for Petal width vs sepal width and fit a regression line
plt.figure(figsize=(8, 6))
plt.show()
# c. Plot density distribution for feature petal length
plt.figure(figsize=(8, 6))
plt.ylabel('Density')
plt.show()
# d. Use a pair plot to show pairwise bivariate distribution in the Iris Dataset
sns.pairplot(iris_data, hue='target')
plt.show()
plt.figure(figsize=(8, 6))
plt.show()
# f. Compute mean, mode, median, standard deviation, confidence interval, and standard error
loc=feature_stats.loc['mean'],
scale=stats.sem(iris_data.iloc[:, :-1]))
feature_stats = feature_stats.append(pd.Series(confidence_interval,
name='standard error'))
print("Feature Statistics:")
print(feature_stats)
# g. Compute correlation coefficients between each pair of features and plot heatmap
plt.figure(figsize=(8, 6))
plt.show()
Ques. 6- Consider the following data frame containing a family name, gender of the family member and her/his
monthly income in each record.
Name Gender MonthlyIncome (Rs.)
Shah Male 114000.00
Vats Male 65000.00
Vats Female 43150.00
Kumar Female 69500.00
Vats Female 155000.00
Kumar Male 103000.00
Shah Male 55000.00
Shah Female 112400.00
Kumar Female 81030.00
Vats Male 71900.00
a. Calculate and display familywise gross monthly income.
b. Calculate and display the member with the highest monthly income.
c. Calculate and display monthly income of all members with income greater than Rs. 60000.00.
d. Calculate and display the average monthly income of the female members
import pandas as pd
# Create the DataFrame
data = {
'Name': ['Shah', 'Vats', 'Vats', 'Kumar', 'Vats', 'Kumar', 'Shah', 'Shah', 'Kumar', 'Vats'],
'Gender': ['Male', 'Male', 'Female', 'Female', 'Female', 'Male', 'Male', 'Female', 'Female', 'Male'],
'MonthlyIncome (Rs.)': [114000.00, 65000.00, 43150.00, 69500.00, 155000.00, 103000.00,
55000.00, 112400.00, 81030.00, 71900.00]
}
df = pd.DataFrame(data)
# a. Calculate and display familywise gross monthly income.
print(family_gross_income)
print()
# b. Calculate and display the member with the highest monthly income.
print(highest_income_member)
print()
# c. Calculate and display monthly income of all members with income greater than Rs. 60000.00.
print(high_income_members['MonthlyIncome (Rs.)'])
print()
# d. Calculate and display the average monthly income of the female members.
import pandas as pd
titanic_df = pd.read_csv('titanic.csv')
# a. Find the total number of passengers with age less than 30
print()
# b. Find the total fare paid by passengers of first class
print()
# c. Compare the number of survivors of each passenger class
survivors_by_class = titanic_df.groupby('Pclass')['Survived'].sum()
print(survivors_by_class)
print()
# d. Compute descriptive statistics for any numeric attribute genderwise
numeric_attribute = 'Age' # You can choose any numeric attribute (e.g., 'Fare', 'SibSp', 'Parch', etc.)
descriptive_statistics_genderwise = titanic_df.groupby('Sex')[numeric_attribute].describe()
print(descriptive_statistics_genderwise)