DS Programs

Assignment 1:
SET A:
1)
import pandas as pd
df=pd.DataFrame(columns=['Name','Age','Percentage'])
df.loc[0]=['MJP',19,85]
df.loc[1]=['MYG',20,84]
df.loc[2]=['JJK',18,90]
df.loc[3]=['KTH',21,84]
df.loc[4]=['PJM',19,78]
df.loc[5]=['KNJ',22,99]
df.loc[6]=['KSJ',23,63]
df.loc[7]=['JHS',24,79]
df
2)
df.shape
df.dtypes
df.info
3)
df.describe()
4)
df.loc[10]=['BBK',29,None]
df.loc[10]=['PCY',29,95]
df.loc[10]=['SJP',29,95]
df.loc[10]=['SJP',None,None]
df.loc[10]=[None,30,96]
df["Remarks"]=None
df
5)
df.shape
df.isnull()
df.duplicated()
6)
df.drop(columns='Remarks',axis=1,inplace=True)
df
df.dropna()
7) df.plot(x="Name",y="Percentage")
8) import matplotlib.pyplot as plt
plt.scatter(df['Percentage'],df['Age'])
SET B:
1) import io
df = pd.read_csv(io.BytesIO(iris_uploaded['SOCR-HeightWeight.csv']))
df.head(10)
df.tail(10)
df.sample(20)
2) df.shape
df.size
df.dtypes
3) df.describe()
4) df.shape
df.isnull().values.any()
df.isnull()
5)
import numpy as np
import pandas as pd
df['BMI']=df['Weight(Pounds)']/(df['Height(Inches)']**2)
df
6) np.max(df['BMI'])
np.min(df['BMI'])
7)
import matplotlib.pyplot as plt
df.plot.scatter(x='Height(Inches)',y='Weight(Pounds)')
Assignment 2:
SET A:
1)
import numpy as np
flat=np.array([[0,1],[2,3]]);
print ("Original flattened array is:\n",flat);
print ("Maximum value of above flattened array:\n",np.max(flat));
print ("Minimum value of above flattened array:\n",np.min(flat));
2)
pointA=np.max(flat);
pointB=np.min(flat);
print("The euclidian distance between two data points is",np.linalg.nor
m(pointA-pointB));
3)
import pandas as pd
import scipy.stats as s
d={'Name':pd.Series(['Ram','Shyam','Seeta','Geeta','Meena','Reena']),'M
arks':pd.Series([80.9,91.2,37,84,89.96,55])}
df=pd.DataFrame(d)
print (df)
print("Mean of the data is",s.tmean(df["Marks"]))
r=max(df["Marks"])-min(df["Marks"])
print("The range of the data is",r)
q3,q1=np.percentile(df["Marks"],[75,25])
iqrvalue = q3-q1
print("The IQR for the data is",iqrvalue)
4)
import numpy as np
from scipy.spatial import distance
a=np.max(flat);
b=np.min(flat);
d=distance.cityblock(a,b)
print(d)
6)
df=pd.DataFrame(columns=['Name','Graduation_percentage','Age'])
df.loc[0]=['MJP',85,19]
df.loc[1]=['MYG',84,20]
df.loc[2]=['JJK',90,18]
df.loc[3]=['KTH',84,21]
df.loc[4]=['PJM',78,19]
df.loc[5]=['KNJ',99,22]
df.loc[6]=['KSJ',67,23]
df.loc[7]=['JHS',79,24]
print("Average age of students is",s.tmean(df["Age"]))
print("Average graduation percentage is",s.tmean(df["Graduation_percent
age"]))
df.describe()
SET B:
1)
from google.colab import files
iris_uploaded = files.upload()
import pandas as pd
import io
df=pd.read_csv(io.BytesIO(iris_uploaded['Iris.csv']))
df
s=df.sample(20)
print(s)
print("Maximum of sepal length",max(s["SepalLengthCm"]))
print("Maximum of sepal width",max(s["SepalWidthCm"]))
print("Maximum of petal length",max(s["PetalLengthCm"]))
print("Maximum of petal width",max(s["PetalWidthCm"]))
print("Minimum of sepal length",min(s["SepalLengthCm"]))
print("Minimum of sepal width",min(s["SepalWidthCm"]))
print("Minimum of petal length",min(s["PetalLengthCm"]))
print("Minimum of petal width",min(s["PetalWidthCm"]))
2)
n1 = len(pd.unique(df['SepalLengthCm']))
print("The distinct values in Sepal length are",n1)
n2 = len(pd.unique(df['SepalWidthCm']))
print("The distinct values in Sepal width are",n2)
n3 = len(pd.unique(df['PetalLengthCm']))
print("The distinct values in Petal length are",n3)
n4 = len(pd.unique(df['PetalWidthCm']))
print("The distinct values in Petal width are",n4)
n5 = len(pd.unique(df['Species']))
print("The distinct values in Species are",n5)
3)
import pandas as pd
import numpy as np
print("The mean of sepal length is",df['SepalLengthCm'].mean())
print("The mean of sepal width is",df['SepalWidthCm'].mean())
print("The mean of petal length is",df['PetalLengthCm'].mean())
print("The mean of petal width is",df['PetalWidthCm'].mean())
print("The median of sepal length is",df['SepalLengthCm'].median())
print("The median of sepal width is",df['SepalWidthCm'].median())
print("The median of petal length is",df['PetalLengthCm'].median())
print("The median of petal width is",df['PetalWidthCm'].median())
SET C:
1)
from scipy.spatial import distance
a = [0,5,0];
b = [0,0,12];
d=distance.minkowski(a,b)
print(d)
2)
import numpy as np
#a = np.arange(9).reshape((3,3))
a=np.array([[0,1,2],[3,4,5],[6,7,8]])
print("Original flattened array:")
print(a)
print("Weighted average along the specified axis of the above flattened
array:")
print(np.average(a, axis=1, weights=[1./4, 2./4, 2./4]))
3)
import numpy as np
x = np.array([0, 1, 3])
y = np.array([2, 4, 5])
print("\nOriginal array1:")
print(x)
print("\nOriginal array1:")
print(y)
print("\nCross-correlation of the said arrays:\n",np.cov(x, y))
4)
import pandas as pd
import io
df=pd.read_csv(io.BytesIO(iris_uploaded['heart.csv']))
df
df.describe()
df.isnull()
6)
import pandas as pd
score={'English':[34,44,50,43,45,23,33,43,20,32],'Statistics':
[44,34,20,23,35,43,13,44,50,32],'Science':
[50,24,40,29,35,43,39,23,30,32],'Maths':
[34,44,50,43,45,23,33,43,20,32],'Art':[44,49,50,42,35,43,39,33,40,42]}
dframe=pd.DataFrame(score)
print(dframe)
print("Aritmetic Mean of English",s.tmean(dframe["English"]))
print("Aritmetic Mean of Statistics",s.tmean(dframe["Statistics"]))
print("Aritmetic Mean of Science",s.tmean(dframe["Science"]))
print("Aritmetic Mean of Maths",s.tmean(dframe["Maths"]))
print("Aritmetic Mean of Art",s.tmean(dframe["Art"]))
print("Harmonic Mean of English",s.hmean(dframe["English"]))
print("Harmonic Mean of Statistics",s.hmean(dframe["Statistics"]))
print("Harmonic Mean of Science",s.hmean(dframe["Science"]))
print("Harmonic Mean of Maths",s.hmean(dframe["Maths"]))
print("Harmonic Mean of Art",s.hmean(dframe["Art"]))
print("Geometric Mean of English",s.gmean(dframe["English"]))
print("Geometric Mean of Statistics",s.gmean(dframe["Statistics"]))
print("Geometric Mean of Science",s.gmean(dframe["Science"]))
print("Geometric Mean of Maths",s.gmean(dframe["Maths"]))
print("Geometric Mean of Art",s.gmean(dframe["Art"]))
Assignment 3:
SET A:
1)
import pandas as pd
import io
dataset=pd.read_csv(io.BytesIO(iris_uploaded['Data.csv']))
dataset
a) dataset.describe()
b) dataset.shape
c) dataset.head(3)
2)
import pandas as pd
import numpy as np
mean=dataset['Age'].mean()
dataset['Age'].fillna(mean,inplace=True)
mean1=dataset['Salary'].mean()
dataset['Salary'].fillna(mean1,inplace=True)
print(dataset)
3)
a)
from sklearn.preprocessing import OneHotEncoder
enc=OneHotEncoder(handle_unknown='ignore')
enc_df=pd.DataFrame(enc.fit_transform(dataset[['Country']]).toarray())
print(enc_df)
df=dataset.join(enc_df)
print(df)
b)
from sklearn.preprocessing import LabelEncoder
labelencoder=LabelEncoder()
dataset['Purchased']=labelencoder.fit_transform(dataset['Purchased'])
print(dataset)
SET B:
1)
import pandas as pd
import numpy as np
from sklearn import preprocessing
print("Data Scaled Between 0 to 1")
data_scalar=preprocessing.MinMaxScaler(feature_range=(0,1))
data_scaled=data_scalar.fit_transform(df)
print("\nMinMaxScaled Data")
print("--------------------------------------------")
print(data_scaled.round(2))
2)
import pandas as pd
import seaborn as sns
from sklearn.preprocessing import StandardScaler, normalize
import scipy.stats as scs
print("\nInitial Mean : ",s.tmean(df).round(2))
print("\nInitial Standarrd Deviation : ",round(df.std(),2))
df_scaled=preprocessing.scale(df)
df_scaled.mean(axis=0)
df_scaled.std(axis=0)
print("\Standardized Mean : ",df_scaled.round(2))
print("\nScaled Mea : ",s.tmean(df_scaled).round(2))
print("Scaled Standard Deviation : ",round(df_scaled.std(),2))
3)
#Normalization
import pandas as pd
import seaborn as sns
from sklearn.preprocessing import StandardScaler, normalize
import scipy.stats as scs
dn=preprocessing.normalize(df,norm='l1')
print("\nL1 Normalized Data")
print("-----------------------------------")
print(dn.round(2))
SET C:
#Set C
import pandas as pd
import numpy as np
df=pd.read_csv("StudentsPerformance.csv")
df
Assignment 4:
SET A:
1)
import numpy as np
import random
x=[]
for i in range(0,50):
x.append(random.randint(0,50))
print(x)
print(type(x))
x.sort()
print(x)
y=x*np.linspace(50,150,50)
plt.plot(x,y)
plt.xlabel('x-axis')
plt.ylabel('y-axis')
plt.title('the line graph')
plt.grid(True)
plt.plot(x,y)
plt.grid(True)
plt.hist(x,color='orange',edgecolor='blue')
plt.grid(True)
plt.scatter(x,y,marker='*',color='green')
data=[11,4,7,30,40.5,66,78,84.7,120,150]
plt.boxplot(data,vert=False)
plt.show()
2)
x.append(550)
x.append(700)
plt.boxplot(x,vert=False)
3)
subject=['math','datascience','hindi','marathi','english']
marks=[70,80,50,30,55]
plt.bar(subject,marks)
plt.show()
explode=[0.1,0.1,0.2,0.1,0.2]
plt.pie(marks,labels=subject,explode=explode,autopct='%1.1f%%')
plt.show()
4)
import pandas as pd
data=pd.read_csv('Iris.csv')
df=pd.DataFrame(data)
print(df)
import numpy as np
frequency=df.groupby(by='Species').agg('count')
print(frequency)
name=df['Species'].unique()
print(name)
plt.bar(name,frequency['Id'])
plt.show()
5)
explode=[0.01,0.01,0.01]
plt.pie(frequency['Id'],labels=name,explode=explode)
plt.show()
6)
plt.hist(df['Species'])
plt.show()
SET B:
1)
plt.xlabel('petal length')
plt.ylabel('petal width')
plt.title("petal length vs petal width")
plt.grid(True)
plt.plot(df['PetalLengthCm'],df['PetalWidthCm'])
plt.show()
plt.plot(df['PetalLengthCm'],df['PetalWidthCm'])
plt.show()
2)
plt.xlabel('Petal length')
plt.ylabel('Sepal length')
plt.title('Petal length vs sepal length')
plt.grid(True)
plt.scatter(df['PetalLengthCm'],df['SepalLengthCm'])
plt.show()
3)
print("distribution of petal length across species of iris")
plt.boxplot(df['PetalLengthCm'],vert=False)
plt.show()
print("distribution of petal length across species of iris")
plt.boxplot(df['SepalWidthCm'],vert=False)
plt.show()

DS Programs

Uploaded by

Document Information

Original Description:

Copyright

Available Formats

Share this document

Share or Embed Document

Sharing Options

Did you find this document useful?

Is this content inappropriate?

Copyright:

Available Formats

DS Programs

Uploaded by

Copyright:

Available Formats

Assignment 1:

8) import matplotlib.pyplot as plt

You might also like