Fdsa Final

EXP.
NO:01 WORKING WITH PANDAS DATA FRAME
SOURCE CODE:
importio
from google.colab import files
uploaded =files.upload()
import pandas as pd
import numpy as np
d= pd.read_csv()
d.describe()
d.head()
d.head(6)
d.tail()
d.dtypes()
d.ndim
d.shape()
d.size()
d.coloumns()
d[ Age ]
a=d.sort_values(by= Age)
a1=d.sort_values(by= Age ascending=True)
a1
a2=d.sort_values(by= Age ,ascending==True,inplace=True)
a2
b=d[d[ Salary ]>20000]

b
1
OUTPUT:
Out[1]:
Sl.No Employee Age Sex Salary
s
S
e
r
v
i
c
e
Teacher 30. female 2.0 20000.

0 0
Officestaff 20. male 5.0 15000.

0 0
Teacher 42. male NaN 35000.

0 0

0 0
Officestaff 30. female 6.0 15000.

0 0
Officestaff 31. female 2.0 NaN

0
Teacher Na male 5.0 25000.

N 0

0 0
Officestaff 38. male 6.0 15000.

0 0
10 Teacher 44. male NaN 35000.

0 0
10 11 Teacher 28. female 4.0 20000.

0 0
11 12 Officestaff 33. female 3.0 NaN

0
12 13 Officestaff 30. male 8.0 20000.

0 0
13 14 Teacher 41. male 1.0 10000.

0 0
14 15 Officestaff 31. female 2
6.0 25000.
0 0
3
Out[2]:
SlNo Age Service Salary
count 21.000000 20.00000 18.000000 19.000000

0
mean 11.000000 34.60000 4.222222 21842.1052

0 63
std 6.204837 7.949843 2.073802 7632.26453

9
min 1.000000 20.00000 1.000000 10000.0000

0 00
25% 6.000000 30.00000 2.000000 15000.0000

0 00
50% 11.000000 32.00000 4.500000 20000.0000

0 00
75% 16.000000 42.00000 6.000000 25000.0000

0 00
max 21.000000 45.00000 8.000000 35000.0000

0 00
Out[3]:
Age Sex Services Salary

SlNo
e
Servic
Teacher 30.0 femal 2.0 20000.0
e
Officestaff 20.0 male 5.0 15000.0
Teacher 42.0 male NaN 35000.0
Teacher 29.0 femal 4.0 25000.0
e
Officestaff 30.0 femal 6.0 15000.0
4
Out[4]:
SlN Employees Age Sex Service Salary

o
Teacher 30.0 femal 2.0 20000.

e 0
Officestaff 20.0 male 5.0 15000.

0
Teacher 42.0 male NaN 35000.

0
Teacher 29.0 femal 4.0 25000.

e 0
Officestaff 30.0 femal 6.0 15000.

e 0
Officestaff 31.0 femal 2.0 NaN
e
5
EX.NO:02 BASICPLOTS USING MATPLOTLIB
SOURCE CODE:
1) PLOT(X,Y)
import matplotlib.pyplot as plt

import numpy as np
plt.style.use('_mpl-gallery')
#make data
x = np.linspace(0, 10, 100)y=4+2 *np.sin(2*x)
#plot
fig, ax=plt.subplots()
ax.plot(x,y,linewidth=2.0)
ax.set(xlim=(0,8),xticks=np.arange(1,8), ylim=(0,8),yticks=np.arange(1,8))
plt.show()
OUTPUT:
6
2) SCATTER(X,Y)

import numpy as np
#make the data
np.random.seed(3)
x=4+ np.random.normal(0,2,24)
y = 4 + np.random.normal(0, 2, len(x))
#size and color:

sizes = np.random.uniform(15, 80,len(x))colors=np.random.uniform(15,80,len(x))
#plot
ax.scatter(x,y,s=sizes,c=colors,vmin=0,vmax=100)
plt.show()
OUTPUT:
7
3) BAR(X,HEIGHT)
import numpy as np
#make data:
np.random.seed(3)
x=0.5+np.arange(8)
y=np.random.uniform(2,7,len(x))
#plot
fig, ax=plt.subplots() ax.bar(x,y,width=1,edgecolor="white",linewidth=0.7)
plt.show()
OUTPUT:
4) STEM(X,Y)
import numpy as np
# make data
8
np.random.seed(3)
x=0.5+np.arange(8)
y=np.random.uniform(2,7,len(x))
#plot
ax.stem(x,y)
ax.set(xlim=(0,8),xticks=np.arange(1,8),ylim=(0,8),yticks=np.arange(1,8))
plt.show()
OUTPUT:
9
EX.NO:3 FREQUENCY DISTRIBUTION, AVERAGES ,VARIABILITY
PROGRAM:
A histogram is a plot of the frequency distribution of numeric array by splitting it to small equal-sized bins.
1) FREQUENCEYDISTRIBUTION
import numpy as np
x=np.random.randint(low=0,high=100,size=100)
#Compute frequency and bins
frequency,bins=np.histogram(x,bins=10,range=[0,100])
#Pretty Print
for b, finzip(bins[1:], frequency):print(round(b,1),''.join(np.repeat('*',f)))
OUTPUT:
10.0* ** * ** *
20.0* * ** * ** ***
30.0* * ** * ** *** * **
40.0* ** * ** * *
50.0* * ** * ** **
60.0* * ** * ** **
70.0* * ** * ** *** * *
80.0* ** * ** *
90.0* * ** * ** *** * ** * * **
100.0* ** * ** * *
2) AVERAGE,VARIENCE,STANDARDDEVIATION
import numpy as np
# Original array
array=np.arange(10)
1
0
print(array)
1
1
r1 =np.average(array)
print("\nMean:",r1)
r2=np.sqrt(np.mean((array -np.mean(array))**2)) print("\
nstd:",r2)
r3 =np.mean((array- np.mean(array))**2)
print("\nvariance:",r3)
OUTPUT:
[0123456789]
Average:4.5
std:2.8722813232690143
variance:8.25
12
EX.NO:4 NORMAL CURVES, CORRELATION AND SCATTER
PLOTS,CORRELATIONCOEFFICENT
PROGRAM:
1) NORMAL CURVE
import numpy asnp
from scipy.stats import norm
#if using a Jupyter notebook,
inlcude:
%matplotlib inline
# define constants
mu=998.8
sigma=73.10
x1=900
x2=1100
#calculate the z-transform
z1=(x1-mu) /sigma
z2=(x2-mu)/sigma
x = np.arange(z1, z2, 0.001)
# range of x in spec
x_all=np.arange(-10, 10, 0.001)
# entire range of x, both in and out of spec
#mean=0,stddev=1,since Z-transform was calculated
y=norm.pdf(x,0,1)
y2 = norm.pdf(x_all,0,1)#build the plot
fig, ax = plt.subplots(figsize=(9,6))
plt.style.use('five thirty eight')

13
ax.plot(x_all,y2)
14
ax.fill_between(x,y,0, alpha=0.3, color='b')
ax.fill_between(x_all,y2,0, alpha=0.1)
ax.set_xlim([-4,4])
ax.set_xlabel('# of Standard Deviations Outside the Mean')
ax.set_yticklabels([])
ax.set_title('Normal Curve')
plt.savefig('normal_curve.png', dpi=72, bbox_inches='tight')
plt.show()
OUTPUT:
2) SCATTERPLOT
import numpy as np
x=range(50)
y=range(50)+np.random.randint(0,30,50)
plt.scatter(x,y)
plt.rcParams.update({ figure.figsize :(10,8), figure.dpi :100})plt.title( simpescatterplot )
plt.xlabel( xvalue
)
plt.ylabel( yvalue)
15
plt.show()
OUTPUT:
16
3) CORRELATION WITH SCATTER PLOT
import numpy as np
x=np.random.randn(100)
y1=x*5+9
y2=-5+x
y3=np.random.randn(100)plt.rcParams.update({'figure.figsize':(10,8),'figure.dpi':100})
plt.scatter(x,y1,label=f’y1,correlation={np.round(np.corrcoef(x,y1)[0,1],2)}’)
plt.scatter(x,y2,label=f’y2,correlation={np.round(np.corrcoef(x,y2)[0,1],2)}’) plt.scatter(x,y3,label=f’y3,
correlation={np.round(np.corrcoef(x,y3)[0,1],2)}’) plt.title('scatterplotandcorrelations')
plt.legend()
plt.show()
17
OUTPUT:
18
EX.NO:5 REGRESSION
SOURCE CODE:
import pandas as pd
import seaborn as sns
# Create a sample
DataFrame
df = pd.DataFrame({'independent_var1': [1, 2, 3, 4, 5],

'independent_var2': [3, 4, 5, 6, 7],'independent_var3': [2, 4, 6, 8, 10],
'dependent_var': [5, 7, 9, 11, 13]})
# Fit the regression model

model = smf.ols(formula='dependent_var ~ independent_var1 + independent_var2 + independent_var3',
data=df).fit()
# Print the model summary

print(model.summary())
# Visualize the regression line

sns.lmplot(x='independent_var1', y='dependent_var', data=df, ci=None)
plt.show()
OUTPUT:
OUT[1]:
OLS Regression Results
==============================================================================
Dep. Variable: Model:dependent_var
Method: Date: OLS R-squared:1.000
Time: Least Squares Thu, 30Adj.
MarR-squared:1.000
2023
No. Observations: Df Residuals:
08:40:16 F-statistic:7.606e+30 Prob (F-statistic):1.05e-46 Log-Likelihood:16
Df Model: -321.5
5 AIC: BIC:
3 -322.2
1
Covariance Type:nonrobust
19
====================================================================================
coefstd errtP>|t|[0.0250.975]
independent_var1 independent_var2
0.15383.17e-164.86e+14 0.000 0.1540.154
1.2308 9e-16 1.37e+15 0.0001.2311.231
independent_var30.3077 6.34e-164.86e+140.0000.3080.308
==============================================================================
Omnibus:nanDurbin-Watson:0.400
Prob(Omnibus):nanJarque-Bera (JB):0.770
Skew:-0.844Prob(JB):0.680
Kurtosis:2.078Cond. No4.43e+16
==============================================================================
OUT[2]:
20
EXNO: 6 IMPLEMENT Z-TEST USING STATSMODELS
SOURCE CODE:
1) ONE SAMPLE Z-TEST
from statsmodels.stats.weightstats import ztest as ztest
#enter IQ levels for 20patients

data=[88,92, 94, 94,96, 97,97,97, 99, 99,105,109,109,109,110, 112,112,113,114,115]
#perform one sample z-test
ztest(data,value=100)
2) TWO SAMPLE Z-TEST
from statsmodels.stats.weightstats
import ztest as ztest
#enter IQ levels for20individuals from each city
cityA=[82,84,85, 89,91, 91,92,94, 99, 99,105,109,109,109,110,112,112,113,114,114]
cityB =[90,91,91, 91,95, 95,99,99, 108,109,109,114,115,116,117,117,128,129,130,133]
#perform two sample z-test
ztest(cityA, cityB, value=0)
21
OUTPUT:
ONE SAMPLE Z-TEST
(1.5976240527147705,0.1101266701438426)
TWO SAMPLE Z-TEST

(-1.9953236073282115,0.046007596761332065)
22
EXNO:7 IMPLEMENTION OF T-TEST USING SCIPY
SOURCE CODE:
1) ONE SAMPLE T-TEST
data=[14,14,16,13,12,17,15,14,15,13,15,14]
import scipy.stats as stats
#perform one sample t-test
stats.ttest_1samp(a=data, popmean=15)
2) TWO SAMPLE T-TEST
import numpy as np
group1=np.array([14,15,15,16,13,8,14,17,16,14, 19,20,21,15,15,16,16,13,14,12])
group2=np.array([15,17,14,17,14,8,12,19,19,14, 17,22,24,16,13,16,13,18,15,13])
#find variance for each group
print(np.var(group1),np.var(group2))
import scipy.stats as stats
#perform two sample t-test with equal variances
stats.ttest_ind(a=group1,b=group2,equal_var=True)
23
OUTPUT:
ONE SAMPLE T-TEST
TtestResult(statistic=-1.6848470783484626,pvalue=0.12014460742498101,df=11)
TWO SAMPLE T-TEST
7.73,12.26
TtestResult(statistic=-0.6337,pvalue=0.53005)
24
EXNO:8 ANOVA TABLE
SOURCECODE:
#enter exam scores for each group
group1= [85,86, 88,75,78,94, 98, 79,71,80]
group2= [91,92, 93,85,87,84, 82, 88,95,96]
group3= [79,78, 88,94,92,85, 83, 85,82,81]
from scipy.stats import f_oneway
#perform one-way ANOVA
f_oneway(group1, group2,group3)
import numpy as np
import pandas as pd
#create data
df=pd.DataFrame({'water':np.repeat(['daily','weekly'],15),'sun':np.tile(np.repeat(['low','med','high'],5),2),
'height':[6,6,6,5,6,5, 5,6,4,5,6,6,7, 8,7,3,4,4,4,5,4,4,4,4,4,5,6, 6,7,8]})
#view rows of data
df import statsmodels.api as sm
from statsmodels.formula.api import ols
#perform two-way ANOVA

model = ols('height ~ C(water) + C(sun) + C(water):C(sun)', data=df).fit()
sm.stats.anova_lm(model, typ=2)
25
OUTPUT:
OUT[1]:
F_oneway Result (statistic=2.3575322551335636,pvalue=0.1138479534583721 8)
OUT[2]:
water sun height
0 daily low 6
1 daily low 6
2 daily low 6
3 daily low 5
4 daily low 6
5 daily med 5
6 daily med 5
7 daily med 6
8 daily med 4
9 daily med 5
sum_sq df F PR(>F)
C(water) 8.533333 1.0 16.0000 0.000527
C(sun) 24.866667 2.0 23.3125 0.000002
C(water):c(sun) 2.466667 2.0 2.3125 0.120667
Residual 12.800000 24.0 NaN NaN
26
EXNO:9 BUILDING AND VALIDATING LINEAR MODELS
SOURCE CODE:
import numpy as np
from sklearn.linear_modelimport LinearRegression
from sklearn.metrics import mean_squared_error, r2_score
# create sample data
x = np.array([1, 2, 3, 4, 5])
y = np.array([2, 4, 5, 4, 6])
# reshape data for input to linear regression model
x = x.reshape(-1, 1)
y = y.reshape(-1, 1)
# create linear regression model
model = LinearRegression()
# fit model to data
model.fit(x, y)
# make predictions on test data
y_pred = model.predict(x)
# calculate mean squared error and r-squared values
mse = mean_squared_error(y, y_pred)

r2 = r2_score(y, y_pred)
# print results
print("Mean squared error: ", mse)

print("R-squared value: ", r2)
27
OUTPUT:
Mean squared error: 0.4799999999999998

R-squared value: 0.7272727272727274
28
EXNO:10 BUILDING AND VALIDATING LOGISTIC MODELS
SOURCE CODE:
# Import necessary libraries
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, confusion_matrix
# Sample data
data = {'age': [25, 32, 28, 45, 36, 52, 40, 55, 60, 48],'income': [50000, 75000, 60000, 55000,
70000, 80000, 65000, 90000, 95000, 90000],'default': [0, 0, 0, 1, 1, 1, 0, 1, 1, 1]}
# Create a DataFrame
df = pd.DataFrame(data)
# Data preprocessing
X = df[['age', 'income']] # Features
Y = df['default'] # Target variable
# Split data into training and testing sets

X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size=0.2, random_state=42)
# Initialize and train the logistic regression model

Model = LogisticRegression()
Model.fit(X_train, Y_train)
# Make predictions on the test set

Y_pred = Model.predict(X_test)
# Model evaluation
accuracy = accuracy_score(Y_test, Y_pred)
precision = precision_score(Y_test, Y_pred)
recall = recall_score(Y_test, Y_pred)
f1 = f1_score(Y_test, Y_pred)
confusion = confusion_matrix(Y_test, Y_pred)
# Print the evaluation metrics

print("Accuracy: {:.2f}%".format(accuracy * 100))
print("Precision: {:.2f}%".format(precision * 100))
print("Recall: {:.2f}%".format(recall * 100))
print("F1-score: {:.2f}%".format(f1 * 100))
print("Confusion Matrix:\n", confusion)
29
OUTPUT:
● The evaluation metrics
Accuracy: 100.00%
Precision: 100.00%
Recall: 100.00%
F1-score: 100.00%
Confusion Matrix:
[[1 0]
[0 1]]
30
EXNO: 11 TIME SERIES ANALYSIS
PROGRAM:
SINGLE TIME SERIES
Import pandas as pd
Import matplotlib.pyplot as plt
Import seaborn as sns
#create Data Frame df=pd.DataFrame({'date':
['1/2/2021','1/3/2021','1/4/2021','1/5/2021','1/6/2021','1/7/2021','1/8/2021'],
'value':[4,7,8,13,17,15,21]})
sns.lineplot(x='date',y='value',data=df)
To customise Line Color and Line Width
Create time series plot with custom aesthetics sns.lineplot(x='date',
y='value', data=df, linewidth=3, color='purple',
linestyle='dashed').set(title='TimeSeries Plot')
#rotatex-axislabelsby15degrees
plt.xticks(rotation=15)
import pandas as pd
import seaborn assns
#create Data Frame df=pd.DataFrame({'date':
['1/1/2021','1/2/2021','1/3/2021','1/4/2021','1/1/2021','1/2/2021','1/3/2021','1/4/2021'
],
'sales':[4,7,8,13,17,15,21,28],'company':['A','A','A','A','B','B','B','B']})
#plot multiple time series

31
sns.lineplot(x='date', y='sales', hue='company', data=df)
32
OUTPUT:
OUT[1]:
<AxesSubplot:xlabel='date',ylabel='value'>
OUT[2]:
([0,1,2,3,4,5,6],
[Text(0,0,''),
Text(0, 0, ''),
Text(0, 0, ''),
Text(0, 0, ''),
Text(0, 0, ''),
Text(0, 0, ''),
Text(0, 0, '')])
33
OUT[3]:
<AxesSubplot:xlabel='date',ylabel='sales'>
34

Fdsa Final

Uploaded by

Document Information

Original Description:

Original Title

Copyright

Available Formats

Share this document

Share or Embed Document

Sharing Options

Did you find this document useful?

Is this content inappropriate?

Copyright:

Available Formats

Fdsa Final

Uploaded by

Copyright:

Available Formats

EXP.

NO:01 WORKING WITH PANDAS DATA FRAME

a1=d.sort_values(by= Age ascending=True)

a2=d.sort_values(by= Age ,ascending==True,inplace=True)

b=d[d[ Salary ]>20000]

Teacher 30. female 2.0 20000.

Officestaff 20. male 5.0 15000.

Teacher 42. male NaN 35000.

Teacher 29. female 4.0 25000.

Officestaff 30. female 6.0 15000.

Officestaff 31. female 2.0 NaN

Teacher Na male 5.0 25000.

Teacher 45. female 2.0 20000.

Officestaff 38. male 6.0 15000.

10 Teacher 44. male NaN 35000.

10 11 Teacher 28. female 4.0 20000.

11 12 Officestaff 33. female 3.0 NaN

12 13 Officestaff 30. male 8.0 20000.

13 14 Teacher 41. male 1.0 10000.

SlNo Age Service Salary

count 21.000000 20.00000 18.000000 19.000000

mean 11.000000 34.60000 4.222222 21842.1052

std 6.204837 7.949843 2.073802 7632.26453

min 1.000000 20.00000 1.000000 10000.0000

25% 6.000000 30.00000 2.000000 15000.0000

50% 11.000000 32.00000 4.500000 20000.0000

75% 16.000000 42.00000 6.000000 25000.0000

max 21.000000 45.00000 8.000000 35000.0000

Age Sex Services Salary

Teacher 42.0 male NaN 35000.0

Teacher 29.0 femal 4.0 25000.0

SlN Employees Age Sex Service Salary

Teacher 30.0 femal 2.0 20000.

Officestaff 20.0 male 5.0 15000.

Teacher 42.0 male NaN 35000.

Teacher 29.0 femal 4.0 25000.

Officestaff 30.0 femal 6.0 15000.

import matplotlib.pyplot as plt

x = np.linspace(0, 10, 100)y=4+2 *np.sin(2*x)

import matplotlib.pyplot as plt

#make the data

#size and color:

import matplotlib.pyplot as plt

fig, ax=plt.subplots() ax.bar(x,y,width=1,edgecolor="white",linewidth=0.7)

#Compute frequency and bins

for b, finzip(bins[1:], frequency):print(round(b,1),''.join(np.repeat('*',f)))

r2=np.sqrt(np.mean((array -np.mean(array))**2)) print("\

import numpy asnp

import matplotlib.pyplot as plt

from scipy.stats import norm

#if using a Jupyter notebook,

#calculate the z-transform

x = np.arange(z1, z2, 0.001)

x_all=np.arange(-10, 10, 0.001)

# entire range of x, both in and out of spec

#mean=0,stddev=1,since Z-transform was calculated

y2 = norm.pdf(x_all,0,1)#build the plot

plt.style.use('five thirty eight')

ax.set_xlabel('# of Standard Deviations Outside the Mean')

plt.savefig('normal_curve.png', dpi=72, bbox_inches='tight')

import matplotlib.pyplot as plt

import seaborn as sns

x = np.linspace(0, 10, 100)y=4+2 np.sin(2x)