Download as pdf or txt
Download as pdf or txt
You are on page 1of 8

ANOVA

January 25, 2021

[1]: import pandas as pd


import numpy as np
import math
from scipy import stats
import scipy
import statsmodels.api as sm
from statsmodels.formula.api import ols
import matplotlib.pyplot as plt

Import data from pc

[3]: naz=pd.read_excel(r'C:\Users\Nazakat ali\Desktop\python\naz.xlsx')


naz

[3]: stat math eco


0 50 40 36
1 47 30 32
2 37 12 24
3 24 50 47

1 Create data for one wat anova formate


[4]: Anova=pd.melt(naz.reset_index(),id_vars=['index'],␣
,→value_vars=['stat','math','eco'])

Anova.columns=['No','Subjects','Marks']
Anova

[4]: No Subjects Marks


0 0 stat 50
1 1 stat 47
2 2 stat 37
3 3 stat 24
4 0 math 40
5 1 math 30
6 2 math 12
7 3 math 50
8 0 eco 36
9 1 eco 32

1
10 2 eco 24
11 3 eco 47

2 Create summary of data set


[27]: import researchpy as rp

[29]: rp.summary_cont(Anova['Marks'].groupby(Anova['Subjects']))

[29]: N Mean SD SE 95% Conf. Interval


Subjects
eco 4 34.75 9.5699 4.7850 19.5221 49.9779
math 4 33.00 16.2070 8.1035 7.2111 58.7889
stat 4 39.50 11.7331 5.8666 20.8299 58.1701

[35]: Anova.boxplot(column='Marks',by='Subjects', figsize=(10,8), color='r')

[35]: <AxesSubplot:title={'center':'Marks'}, xlabel='Subjects'>

2
3 Fiting Anova model in the data set ‘Anova’
[55]: Anova_fit=ols('Marks~C(Subjects)',data=Anova).fit()
Anova_fit

[55]: <statsmodels.regression.linear_model.RegressionResultsWrapper at 0x20174440a90>

[56]: Anova_table=sm.stats.anova_lm(Anova_fit, typ=1)


Anova_table

[56]: df sum_sq mean_sq F PR(>F)


C(Subjects) 2.0 90.50 45.250000 0.275961 0.765037
Residual 9.0 1475.75 163.972222 NaN NaN

3
4 Find p-value
[44]: 1-scipy.stats.f.cdf(0.2759,2,9)

[44]: 0.7650816760156358

[98]: from statsmodels.stats.anova import anova_lm

[61]: Anova_fit.summary()

[61]: <class 'statsmodels.iolib.summary.Summary'>


"""
OLS Regression Results
==============================================================================
Dep. Variable: Marks R-squared: 0.058
Model: OLS Adj. R-squared: -0.152
Method: Least Squares F-statistic: 0.2760
Date: Sun, 24 Jan 2021 Prob (F-statistic): 0.765
Time: 23:06:37 Log-Likelihood: -45.899
No. Observations: 12 AIC: 97.80
Df Residuals: 9 BIC: 99.25
Df Model: 2
Covariance Type: nonrobust
================================================================================
=======
coef std err t P>|t| [0.025
0.975]
--------------------------------------------------------------------------------
-------
Intercept 34.7500 6.403 5.427 0.000 20.266
49.234
C(Subjects)[T.math] -1.7500 9.055 -0.193 0.851 -22.233
18.733
C(Subjects)[T.stat] 4.7500 9.055 0.525 0.613 -15.733
25.233
==============================================================================
Omnibus: 0.576 Durbin-Watson: 2.378
Prob(Omnibus): 0.750 Jarque-Bera (JB): 0.600
Skew: -0.343 Prob(JB): 0.741
Kurtosis: 2.146 Cond. No. 3.73
==============================================================================

Notes:
[1] Standard Errors assume that the covariance matrix of the errors is correctly
specified.
"""

4
5 Find Multiple camporison Test (LSD) Methods
[46]: from statsmodels.stats.multicomp import pairwise_tukeyhsd
from statsmodels.stats.multicomp import MultiComparison

[47]: #create data frame for LSD


mc=MultiComparison(Anova['Marks'], Anova['Subjects'])
mc

[47]: <statsmodels.sandbox.stats.multicomp.MultiComparison at 0x2017439bf70>

[52]: mc_fit=mc.tukeyhsd(0.05)
mc_fit.summary()

[52]: <class 'statsmodels.iolib.table.SimpleTable'>

6 RCB design
[53]: Anova1=pd.melt(naz.reset_index(),id_vars=['index'],␣
,→value_vars=['stat','math','eco'])

Anova1.columns=['Blocks','Subjects','Marks']
Anova1

[53]: Blocks Subjects Marks


0 0 stat 50
1 1 stat 47
2 2 stat 37
3 3 stat 24
4 0 math 40
5 1 math 30
6 2 math 12
7 3 math 50
8 0 eco 36
9 1 eco 32
10 2 eco 24
11 3 eco 47

[54]: Anova1_fit1=ols('Marks~C(Blocks)+C(Subjects)',data=Anova1).fit()
Anova1_fit1
Anova1_table=sm.stats.anova_lm(Anova1_fit1, typ=2)
Anova1_table

[54]: sum_sq df F PR(>F)


C(Blocks) 572.25 3.0 1.266740 0.367077
C(Subjects) 90.50 2.0 0.300498 0.750975
Residual 903.50 6.0 NaN NaN

5
[62]: Anova1_fit1.summary()

[62]: <class 'statsmodels.iolib.summary.Summary'>


"""
OLS Regression Results
==============================================================================
Dep. Variable: Marks R-squared: 0.423
Model: OLS Adj. R-squared: -0.058
Method: Least Squares F-statistic: 0.8802
Date: Sun, 24 Jan 2021 Prob (F-statistic): 0.546
Time: 23:07:13 Log-Likelihood: -42.955
No. Observations: 12 AIC: 97.91
Df Residuals: 6 BIC: 100.8
Df Model: 5
Covariance Type: nonrobust
================================================================================
=======
coef std err t P>|t| [0.025
0.975]
--------------------------------------------------------------------------------
-------
Intercept 41.0000 8.677 4.725 0.003 19.768
62.232
C(Blocks)[T.1] -5.6667 10.019 -0.566 0.592 -30.183
18.850
C(Blocks)[T.2] -17.6667 10.019 -1.763 0.128 -42.183
6.850
C(Blocks)[T.3] -1.6667 10.019 -0.166 0.873 -26.183
22.850
C(Subjects)[T.math] -1.7500 8.677 -0.202 0.847 -22.982
19.482
C(Subjects)[T.stat] 4.7500 8.677 0.547 0.604 -16.482
25.982
==============================================================================
Omnibus: 2.503 Durbin-Watson: 2.431
Prob(Omnibus): 0.286 Jarque-Bera (JB): 1.061
Skew: -0.728 Prob(JB): 0.588
Kurtosis: 3.060 Cond. No. 5.35
==============================================================================

Notes:
[1] Standard Errors assume that the covariance matrix of the errors is correctly
specified.
"""

[89]: df45 = pd.DataFrame({'prepration': np.repeat(['3-hr', 'oneday', '10 week'], 6),

6
'College': np.tile(np.repeat(['Business', 'Engineering',␣
,→ 'Art and sci'], 2), 3),
'score': [500, 580, 540, 460, 480, 400, 460, 540, 560, 620,
420, 480, 560, 600, 600, 580, 480, 410]})

[90]: df45

[90]: prepration College score


0 3-hr Business 500
1 3-hr Business 580
2 3-hr Engineering 540
3 3-hr Engineering 460
4 3-hr Art and sci 480
5 3-hr Art and sci 400
6 oneday Business 460
7 oneday Business 540
8 oneday Engineering 560
9 oneday Engineering 620
10 oneday Art and sci 420
11 oneday Art and sci 480
12 10 week Business 560
13 10 week Business 600
14 10 week Engineering 600
15 10 week Engineering 580
16 10 week Art and sci 480
17 10 week Art and sci 410

[92]: fit_model=ols('score~C(College)+C(prepration)+C(College):C(prepration)',␣
,→data=df45).fit()

fit_model
fit_anova=anova_lm(fit_model, typ=2)
fit_anova

[92]: sum_sq df F PR(>F)


C(College) 45300.0 2.0 10.269521 0.004757
C(prepration) 6100.0 2.0 1.382872 0.299436
C(College):C(prepration) 11200.0 4.0 1.269521 0.350328
Residual 19850.0 9.0 NaN NaN

[99]: fit_model.summary()

[99]: <class 'statsmodels.iolib.summary.Summary'>


"""
OLS Regression Results
==============================================================================
Dep. Variable: score R-squared: 0.759
Model: OLS Adj. R-squared: 0.545

7
Method: Least Squares F-statistic: 3.548
Date: Mon, 25 Jan 2021 Prob (F-statistic): 0.0384
Time: 00:04:47 Log-Likelihood: -88.591
No. Observations: 18 AIC: 195.2
Df Residuals: 9 BIC: 203.2
Df Model: 8
Covariance Type: nonrobust
================================================================================
=====================================
coef std err
t P>|t| [0.025 0.975]
--------------------------------------------------------------------------------
-------------------------------------
Intercept 445.0000 33.208
13.400 0.000 369.878 520.122
C(College)[T.Business] 135.0000 46.963
2.875 0.018 28.762 241.238
C(College)[T.Engineering] 145.0000 46.963
3.088 0.013 38.762 251.238
C(prepration)[T.3-hr] -5.0000 46.963
-0.106 0.918 -111.238 101.238
C(prepration)[T.oneday] 5.0000 46.963
0.106 0.918 -101.238 111.238
C(College)[T.Business]:C(prepration)[T.3-hr] -35.0000 66.416
-0.527 0.611 -185.244 115.244
C(College)[T.Engineering]:C(prepration)[T.3-hr] -85.0000 66.416
-1.280 0.233 -235.244 65.244
C(College)[T.Business]:C(prepration)[T.oneday] -85.0000 66.416
-1.280 0.233 -235.244 65.244
C(College)[T.Engineering]:C(prepration)[T.oneday] -5.0000 66.416
-0.075 0.942 -155.244 145.244
==============================================================================
Omnibus: 16.430 Durbin-Watson: 2.984
Prob(Omnibus): 0.000 Jarque-Bera (JB): 2.333
Skew: 0.000 Prob(JB): 0.311
Kurtosis: 1.236 Cond. No. 13.9
==============================================================================

Notes:
[1] Standard Errors assume that the covariance matrix of the errors is correctly
specified.
"""

You might also like