Print Print Print Print: Import As

#QUESTION 1
import numpy as np
ARR1=np.random.rand(2,3)
print(ARR1)
print("MEAN:",np.mean(ARR1,axis=1))
print("STANDAR DEV:",np.std(ARR1,axis=1))
print("VARIANCE:",np.var(ARR1,axis=1))
#-----------------------------------------------------------------------------
-------------------
import numpy as np
a=eval(input("Enter the no. of rows:"))
b=eval(input("Enter the no. of columns:"))
Arr=np.random.randint(1,100,(a,b))
print(Arr)
print(Arr.shape)
print(type(Arr))
print(Arr.dtype)
r=np.reshape(Arr,(b,a))
print("Array After reshape:\n",r)
#-----------------------------------------------------------------------------
-------------------
import numpy as np
a=np.array([0,2,3,0,4,5,np.nan])
print(np.where(a==0))
print(np.where(a!=0))
print(np.where(np.isnan(a)))
#-----------------------------------------------------------------------------
-------------------
import numpy as np
Array1=np.random.randint(1,10,6)
print("Array1 = ",Array1)
Array4=Array2-Array3
Array5=Array1*2
print("Covariance of Array1 and Array4=\n",np.cov(Array1,Array4))
print("Covariance of Array1 and Array5=\n",np.cov(Array1,Array5))
print("Corealation of Array1 and Array4=\n",np.corrcoef(Array1,Array4))
print("Corealation of Array1 and Array5=\n",np.corrcoef(Array1,Array5))
#-----------------------------------------------------------------------------
--------------------
import numpy as np
print("Sum is:",np.add(Array1[:5],Array2[:5]))
print("Product is:",np.multiply(Array1[5:10],Array2[5:10]))
#-----------------------------------------------------------------------------
---------------------
#a = np.array([[4,3, 1],[5 ,7, 0],[9, 9, 3],[8, 2, 4]])

a=eval(input("Enter the no. of rows:"))
b=eval(input("Enter the no. of columns:"))
Arr=np.random.randint(1,100,(a,b))
print(Arr)
c=eval(input("Enter the rows1 to interchange:"))
d=eval(input("Enter the row2 to interchange:"))
Arr[[c,d],:]=Arr[[d,c],:]
print("Array After swapping")
print(Arr)
c1=int(input("Column No. to flip"))

Arr[:,c1]=np.flip(Arr[:,c1])
print("Array After reversing column")
print(Arr)
#Question 3
import pandas as pd
import numpy as np
a=pd.DataFrame(np.random.randn(50,3),columns=['A','B','C'])
print(a)
null_val=int(0.1*a.size)
print(null_val)
ind_null_val=np.random.choice(a.size,null_val)
a.values.flat[ind_null_val]=np.nan
print(a)
#-----------------------------------------------------------------------------
----
col=a.dropna(thresh=45,axis=1)
print(col)
#-----------------------------------------------------------------------------
---
print("No. of missing values:",a.isnull().sum().sum())
#-----------------------------------------------------------------------------
---
print(a.sort_values(by=['A']))
#-----------------------------------------------------------------------------
---
print(a.drop_duplicates("A"))
#-----------------------------------------------------------------------------
---
print("Covariance of Column1 and Column2=\n",a['A'].cov(a['B']))
print("Corelation of Column1 and Column2=\n",a['B'].cov(a['C']))
#-----------------------------------------------------------------------------
----
print(pd.cut(a['B'], 5, precision=2))
#-----------------------------------------------------------------------------
----
#Question 7
data={"Family
Name":['Shah','Vats','Vats','Kumar','Vats','Kumar','Shah','Shah','Kumar','Vats
'],
"Gender":['Male','Male','Female','Female','Female','Male','Male','Female
','Female','Male'],
"Income":[44000,65000,43150,66500,255000,103000,55000,112400,81030,71900
]}
df=pd.DataFrame(data)
print(df)
print(df.groupby('Family Name')['Income'].sum())
print(df.groupby('Family Name')['Income'].agg(['max','min']))
print(df[df['Income']<80000])
females=df[df['Gender']=='Female']
Avg_income=females
df2=df.drop(df[df['Income']<df['Income'].mean()].index)
print(df2)
#IRIS PRACTICLE
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
a=pd.read_excel(r"C:\Users\HP\Downloads\iris.xlsx")
# a. Display data types and info on the dataset

print(a.info())
# b. Find number of missing values in each column

missing_values = a.isnull().sum()
print("Missing Values:\n", missing_values)
# c. Plot bar chart for frequency of each class label
plt.figure(figsize=(6, 4))
a.plot.bar(width=2)
plt.xlabel('Class Label')
plt.ylabel('Frequency')
plt.title('Frequency of Each Class Label')
plt.show()
# d. Scatter plot for Petal Length vs Sepal Length with regression line
sns.regplot(x='Sepal_length', y='Petal_length', data=a)
plt.xlabel('Sepal Length')
plt.ylabel('Petal Length')
plt.title('Scatter plot: Petal Length vs Sepal Length with Regression Line')
plt.show()
# e. Density distribution for feature Petal Width

sns.kdeplot(data=a['Petal_width'], shade=True)
plt.xlabel('Petal Width')
plt.ylabel('Density')
plt.title('Density Distribution of Petal Width')
plt.show()
# f. Pair plot for pairwise bivariate distribution

sns.pairplot(a)
plt.show()
# g. Heatmap for any two numeric attributes (e.g., Sepal Length and Petal
Width)
numeric_attributes = ['Sepal_length', 'Petal_width']
sns.heatmap(a[numeric_attributes].corr(), annot=True, cmap='coolwarm')
plt.title('Correlation Heatmap for Numeric Attributes')
plt.show()
# h. Compute statistics for each numeric feature
statistics = a.describe()
mode = a.mode().iloc[0]
std_error = a.sem()
confidence_interval = 1.96 * (a.std() / (len(a) ** 0.5))
# Print computed statistics

print("Statistics for each numeric feature:\n", statistics)
print("\nMode for each numeric feature:\n", mode)
print("\nStandard Error for each numeric feature:\n", std_error)
print("\nConfidence Interval for each numeric feature:\n",
confidence_interval)
# i. Compute correlation coefficients between each pair of features and plot

heatmap
correlation_matrix = a.corr()
sns.heatmap(correlation_matrix, annot=True, cmap='coolwarm')
plt.title('Correlation Heatmap for Iris Dataset')
plt.show()
#TITANIC PRACTICLE
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
a=pd.read_excel(r"C:\Users\HP\Downloads\titanic.xlsx")
df=a.copy(deep=True)
# a. Clean the data by dropping the column with the largest number of missing
values
missing_values = a.isnull().sum()
column_to_drop = missing_values.idxmax()
df.drop(column_to_drop, axis=1, inplace=True)
print(df)
# b. Find the total number of passengers with age more than 30

passengers_over_30 = a[a['Age'] > 30]
total_passengers_over_30 = len(passengers_over_30)
print("No. of passengers over 30 :",total_passengers_over_30)
# c. Find the total fare paid by passengers of the second class

total_fare_second_class = a[a['Pclass'] == 2]['Fare'].sum()
print("Total fare of second class : ",total_fare_second_class)
# d. Compare the number of survivors of each passenger class

survivors_per_class = a.groupby('Pclass')['Survived'].sum()
print("No. of Survivors of each class\n",survivors_per_class)
# e. Compute descriptive statistics for age attribute gender-wise

descriptive_stats_age_gender = a.groupby('Sex')['Age'].describe()
print("Descriptive statistics for age attribute gender
wise\n",descriptive_stats_age_gender)
# f. Draw a scatter plot for passenger fare paid by Female and Male passengers
separately
sns.scatterplot(data=a, x='Fare', y='Sex', hue='Sex')
plt.title('Scatter plot of Fare Paid by Gender')
plt.xlabel('Fare')
plt.ylabel('Gender')
plt.show()
# g. Compare density distribution for features age and passenger fare

sns.kdeplot(data=a['Age'], label='Age', shade=True)
sns.kdeplot(data=a['Fare'], label='Fare', shade=True)
plt.title('Density Distribution of Age and Fare')
plt.xlabel('Value')
plt.ylabel('Density')
plt.legend()
plt.show()
# h. Draw a pie chart for three passenger classes

class_counts = a['Pclass'].value_counts()
plt.pie(class_counts, labels=['Class 3', 'Class 1', 'Class 2'],
autopct='%1.1f%%', colors=['skyblue', 'lightgreen', 'lightcoral'])
plt.title('Passenger Class Distribution')
plt.show()
# i. Find % of survived passengers for each class and analyze

survived_per_class = a.groupby('Pclass')['Survived'].mean() * 100
print("% of survived passengers for each class\n",survived_per_class)
#question4
import pandas as pd
a=pd.read_excel(r"C:\Users\HP\Documents\Day1.xlsx")
print(a)
b=pd.read_excel(r"C:\Users\HP\Documents\Day2.xlsx")
print(b)
print("c")
f4=pd.concat([a,b])
f5=f4.drop_duplicates(["name"])
print(len(f5))
print("d")
index=f4.set_index(['name','Date'])
print(index.describe())
print("a")
c=pd.merge(a,b,on='name')
print(c)
print("b")
print(f4[~f4['name'].isin(c['name'])])

Print Print Print Print: Import As

Uploaded by

Document Information

Original Title

Copyright

Available Formats

Share this document

Share or Embed Document

Sharing Options

Did you find this document useful?

Is this content inappropriate?

Copyright:

Available Formats

Print Print Print Print: Import As

Uploaded by

Copyright:

Available Formats

#QUESTION 1

#a = np.array([[4,3, 1],[5 ,7, 0],[9, 9, 3],[8, 2, 4]])

c1=int(input("Column No. to flip"))

# a. Display data types and info on the dataset

# b. Find number of missing values in each column

# c. Plot bar chart for frequency of each class label

# e. Density distribution for feature Petal Width

# f. Pair plot for pairwise bivariate distribution

# h. Compute statistics for each numeric feature

# Print computed statistics

# i. Compute correlation coefficients between each pair of features and plot

# b. Find the total number of passengers with age more than 30

# c. Find the total fare paid by passengers of the second class

# d. Compare the number of survivors of each passenger class

# e. Compute descriptive statistics for age attribute gender-wise

# g. Compare density distribution for features age and passenger fare

# h. Draw a pie chart for three passenger classes

# i. Find % of survived passengers for each class and analyze

You might also like