Download as pdf or txt
Download as pdf or txt
You are on page 1of 6

#QUESTION 1

import numpy as np
ARR1=np.random.rand(2,3)
print(ARR1)
print("MEAN:",np.mean(ARR1,axis=1))
print("STANDAR DEV:",np.std(ARR1,axis=1))
print("VARIANCE:",np.var(ARR1,axis=1))
#-----------------------------------------------------------------------------
-------------------
import numpy as np
a=eval(input("Enter the no. of rows:"))
b=eval(input("Enter the no. of columns:"))
Arr=np.random.randint(1,100,(a,b))
print(Arr)
print(Arr.shape)
print(type(Arr))
print(Arr.dtype)
r=np.reshape(Arr,(b,a))
print("Array After reshape:\n",r)

#-----------------------------------------------------------------------------
-------------------
import numpy as np
a=np.array([0,2,3,0,4,5,np.nan])
print(np.where(a==0))
print(np.where(a!=0))
print(np.where(np.isnan(a)))
#-----------------------------------------------------------------------------
-------------------

import numpy as np
Array1=np.random.randint(1,10,6)
Array2=np.random.randint(1,10,6)
Array3=np.random.randint(1,10,6)
print("Array1 = ",Array1)
print("Array2 = ",Array2)
print("Array3 = ",Array3)
Array4=Array2-Array3
print("Array4 = ",Array4)
Array5=Array1*2
print("Array5 = ",Array5)
print("Covariance of Array1 and Array4=\n",np.cov(Array1,Array4))
print("Covariance of Array1 and Array5=\n",np.cov(Array1,Array5))
print("Corealation of Array1 and Array4=\n",np.corrcoef(Array1,Array4))
print("Corealation of Array1 and Array5=\n",np.corrcoef(Array1,Array5))
#-----------------------------------------------------------------------------
--------------------
import numpy as np
Array1=np.random.randint(1,10,10)
Array2=np.random.randint(1,10,10)
print("Sum is:",np.add(Array1[:5],Array2[:5]))
print("Product is:",np.multiply(Array1[5:10],Array2[5:10]))
#-----------------------------------------------------------------------------
---------------------

#a = np.array([[4,3, 1],[5 ,7, 0],[9, 9, 3],[8, 2, 4]])


a=eval(input("Enter the no. of rows:"))
b=eval(input("Enter the no. of columns:"))
Arr=np.random.randint(1,100,(a,b))
print(Arr)
c=eval(input("Enter the rows1 to interchange:"))
d=eval(input("Enter the row2 to interchange:"))
Arr[[c,d],:]=Arr[[d,c],:]
print("Array After swapping")
print(Arr)

c1=int(input("Column No. to flip"))


Arr[:,c1]=np.flip(Arr[:,c1])
print("Array After reversing column")
print(Arr)

#Question 3
import pandas as pd
import numpy as np
a=pd.DataFrame(np.random.randn(50,3),columns=['A','B','C'])
print(a)
null_val=int(0.1*a.size)
print(null_val)
ind_null_val=np.random.choice(a.size,null_val)
a.values.flat[ind_null_val]=np.nan
print(a)
#-----------------------------------------------------------------------------
----
col=a.dropna(thresh=45,axis=1)
print(col)
#-----------------------------------------------------------------------------
---
print("No. of missing values:",a.isnull().sum().sum())
#-----------------------------------------------------------------------------
---
print(a.sort_values(by=['A']))
#-----------------------------------------------------------------------------
---
print(a.drop_duplicates("A"))
#-----------------------------------------------------------------------------
---
print("Covariance of Column1 and Column2=\n",a['A'].cov(a['B']))
print("Corelation of Column1 and Column2=\n",a['B'].cov(a['C']))
#-----------------------------------------------------------------------------
----
print(pd.cut(a['B'], 5, precision=2))
#-----------------------------------------------------------------------------
----
#Question 7
data={"Family
Name":['Shah','Vats','Vats','Kumar','Vats','Kumar','Shah','Shah','Kumar','Vats
'],
"Gender":['Male','Male','Female','Female','Female','Male','Male','Female
','Female','Male'],
"Income":[44000,65000,43150,66500,255000,103000,55000,112400,81030,71900
]}
df=pd.DataFrame(data)
print(df)
print(df.groupby('Family Name')['Income'].sum())
print(df.groupby('Family Name')['Income'].agg(['max','min']))
print(df[df['Income']<80000])
females=df[df['Gender']=='Female']
Avg_income=females
df2=df.drop(df[df['Income']<df['Income'].mean()].index)
print(df2)

#IRIS PRACTICLE
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

a=pd.read_excel(r"C:\Users\HP\Downloads\iris.xlsx")

# a. Display data types and info on the dataset


print(a.info())

# b. Find number of missing values in each column


missing_values = a.isnull().sum()
print("Missing Values:\n", missing_values)

# c. Plot bar chart for frequency of each class label

plt.figure(figsize=(6, 4))
a.plot.bar(width=2)
plt.xlabel('Class Label')
plt.ylabel('Frequency')
plt.title('Frequency of Each Class Label')
plt.show()

# d. Scatter plot for Petal Length vs Sepal Length with regression line
plt.figure(figsize=(8, 6))
sns.regplot(x='Sepal_length', y='Petal_length', data=a)
plt.xlabel('Sepal Length')
plt.ylabel('Petal Length')
plt.title('Scatter plot: Petal Length vs Sepal Length with Regression Line')
plt.show()

# e. Density distribution for feature Petal Width


plt.figure(figsize=(8, 6))
sns.kdeplot(data=a['Petal_width'], shade=True)
plt.xlabel('Petal Width')
plt.ylabel('Density')
plt.title('Density Distribution of Petal Width')
plt.show()

# f. Pair plot for pairwise bivariate distribution


sns.pairplot(a)
plt.show()

# g. Heatmap for any two numeric attributes (e.g., Sepal Length and Petal
Width)
numeric_attributes = ['Sepal_length', 'Petal_width']
sns.heatmap(a[numeric_attributes].corr(), annot=True, cmap='coolwarm')
plt.title('Correlation Heatmap for Numeric Attributes')
plt.show()

# h. Compute statistics for each numeric feature

statistics = a.describe()
mode = a.mode().iloc[0]
std_error = a.sem()
confidence_interval = 1.96 * (a.std() / (len(a) ** 0.5))

# Print computed statistics


print("Statistics for each numeric feature:\n", statistics)
print("\nMode for each numeric feature:\n", mode)
print("\nStandard Error for each numeric feature:\n", std_error)
print("\nConfidence Interval for each numeric feature:\n",
confidence_interval)

# i. Compute correlation coefficients between each pair of features and plot


heatmap
correlation_matrix = a.corr()
plt.figure(figsize=(8, 6))
sns.heatmap(correlation_matrix, annot=True, cmap='coolwarm')
plt.title('Correlation Heatmap for Iris Dataset')
plt.show()

#TITANIC PRACTICLE
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
a=pd.read_excel(r"C:\Users\HP\Downloads\titanic.xlsx")
df=a.copy(deep=True)
# a. Clean the data by dropping the column with the largest number of missing
values
missing_values = a.isnull().sum()
column_to_drop = missing_values.idxmax()
df.drop(column_to_drop, axis=1, inplace=True)
print(df)

# b. Find the total number of passengers with age more than 30


passengers_over_30 = a[a['Age'] > 30]
total_passengers_over_30 = len(passengers_over_30)
print("No. of passengers over 30 :",total_passengers_over_30)

# c. Find the total fare paid by passengers of the second class


total_fare_second_class = a[a['Pclass'] == 2]['Fare'].sum()
print("Total fare of second class : ",total_fare_second_class)

# d. Compare the number of survivors of each passenger class


survivors_per_class = a.groupby('Pclass')['Survived'].sum()
print("No. of Survivors of each class\n",survivors_per_class)

# e. Compute descriptive statistics for age attribute gender-wise


descriptive_stats_age_gender = a.groupby('Sex')['Age'].describe()
print("Descriptive statistics for age attribute gender
wise\n",descriptive_stats_age_gender)

# f. Draw a scatter plot for passenger fare paid by Female and Male passengers
separately
plt.figure(figsize=(8, 6))
sns.scatterplot(data=a, x='Fare', y='Sex', hue='Sex')
plt.title('Scatter plot of Fare Paid by Gender')
plt.xlabel('Fare')
plt.ylabel('Gender')
plt.show()

# g. Compare density distribution for features age and passenger fare


plt.figure(figsize=(10, 6))
sns.kdeplot(data=a['Age'], label='Age', shade=True)
sns.kdeplot(data=a['Fare'], label='Fare', shade=True)
plt.title('Density Distribution of Age and Fare')
plt.xlabel('Value')
plt.ylabel('Density')
plt.legend()
plt.show()

# h. Draw a pie chart for three passenger classes


class_counts = a['Pclass'].value_counts()
plt.figure(figsize=(6, 6))
plt.pie(class_counts, labels=['Class 3', 'Class 1', 'Class 2'],
autopct='%1.1f%%', colors=['skyblue', 'lightgreen', 'lightcoral'])
plt.title('Passenger Class Distribution')
plt.show()

# i. Find % of survived passengers for each class and analyze


survived_per_class = a.groupby('Pclass')['Survived'].mean() * 100
print("% of survived passengers for each class\n",survived_per_class)
#question4
import pandas as pd
a=pd.read_excel(r"C:\Users\HP\Documents\Day1.xlsx")
print(a)
b=pd.read_excel(r"C:\Users\HP\Documents\Day2.xlsx")
print(b)
print("c")
f4=pd.concat([a,b])

f5=f4.drop_duplicates(["name"])
print(len(f5))
print("d")
index=f4.set_index(['name','Date'])
print(index.describe())
print("a")
c=pd.merge(a,b,on='name')
print(c)
print("b")
print(f4[~f4['name'].isin(c['name'])])

You might also like