ANOVA

ANOVA
January 25, 2021
[1]: import pandas as pd

import numpy as np
import math
from scipy import stats
import scipy
import statsmodels.api as sm
from statsmodels.formula.api import ols
import matplotlib.pyplot as plt
Import data from pc
[3]: naz=pd.read_excel(r'C:\Users\Nazakat ali\Desktop\python\naz.xlsx')

naz
[3]: stat math eco

0 50 40 36
1 47 30 32
2 37 12 24
3 24 50 47
1 Create data for one wat anova formate

[4]: Anova=pd.melt(naz.reset_index(),id_vars=['index'],␣
,→value_vars=['stat','math','eco'])
Anova.columns=['No','Subjects','Marks']
Anova
[4]: No Subjects Marks

0 0 stat 50
1 1 stat 47
2 2 stat 37
3 3 stat 24
4 0 math 40
5 1 math 30
6 2 math 12
7 3 math 50
8 0 eco 36
9 1 eco 32
1
10 2 eco 24
11 3 eco 47
2 Create summary of data set

[27]: import researchpy as rp
[29]: rp.summary_cont(Anova['Marks'].groupby(Anova['Subjects']))
[29]: N Mean SD SE 95% Conf. Interval

Subjects
eco 4 34.75 9.5699 4.7850 19.5221 49.9779
math 4 33.00 16.2070 8.1035 7.2111 58.7889
stat 4 39.50 11.7331 5.8666 20.8299 58.1701
[35]: Anova.boxplot(column='Marks',by='Subjects', figsize=(10,8), color='r')
[35]: <AxesSubplot:title={'center':'Marks'}, xlabel='Subjects'>
2
3 Fiting Anova model in the data set ‘Anova’
[55]: Anova_fit=ols('Marks~C(Subjects)',data=Anova).fit()
Anova_fit
[55]: <statsmodels.regression.linear_model.RegressionResultsWrapper at 0x20174440a90>
[56]: Anova_table=sm.stats.anova_lm(Anova_fit, typ=1)

Anova_table
[56]: df sum_sq mean_sq F PR(>F)

C(Subjects) 2.0 90.50 45.250000 0.275961 0.765037
Residual 9.0 1475.75 163.972222 NaN NaN
3
4 Find p-value
[44]: 1-scipy.stats.f.cdf(0.2759,2,9)
[44]: 0.7650816760156358
[98]: from statsmodels.stats.anova import anova_lm
[61]: Anova_fit.summary()
[61]: <class 'statsmodels.iolib.summary.Summary'>

"""
OLS Regression Results
==============================================================================
Dep. Variable: Marks R-squared: 0.058
Model: OLS Adj. R-squared: -0.152
Method: Least Squares F-statistic: 0.2760
Date: Sun, 24 Jan 2021 Prob (F-statistic): 0.765
Time: 23:06:37 Log-Likelihood: -45.899
No. Observations: 12 AIC: 97.80
Df Residuals: 9 BIC: 99.25
Df Model: 2
Covariance Type: nonrobust
================================================================================
=======
coef std err t P>|t| [0.025
0.975]
--------------------------------------------------------------------------------
-------
Intercept 34.7500 6.403 5.427 0.000 20.266
49.234
C(Subjects)[T.math] -1.7500 9.055 -0.193 0.851 -22.233
18.733
C(Subjects)[T.stat] 4.7500 9.055 0.525 0.613 -15.733
25.233
==============================================================================
Omnibus: 0.576 Durbin-Watson: 2.378
Prob(Omnibus): 0.750 Jarque-Bera (JB): 0.600
Skew: -0.343 Prob(JB): 0.741
Kurtosis: 2.146 Cond. No. 3.73
==============================================================================
Notes:
[1] Standard Errors assume that the covariance matrix of the errors is correctly
specified.
"""
4
5 Find Multiple camporison Test (LSD) Methods
[46]: from statsmodels.stats.multicomp import pairwise_tukeyhsd
from statsmodels.stats.multicomp import MultiComparison
[47]: #create data frame for LSD

mc=MultiComparison(Anova['Marks'], Anova['Subjects'])
mc
[47]: <statsmodels.sandbox.stats.multicomp.MultiComparison at 0x2017439bf70>
[52]: mc_fit=mc.tukeyhsd(0.05)
mc_fit.summary()
[52]: <class 'statsmodels.iolib.table.SimpleTable'>
6 RCB design
[53]: Anova1=pd.melt(naz.reset_index(),id_vars=['index'],␣
,→value_vars=['stat','math','eco'])
Anova1.columns=['Blocks','Subjects','Marks']
Anova1
[53]: Blocks Subjects Marks

0 0 stat 50
1 1 stat 47
2 2 stat 37
3 3 stat 24
4 0 math 40
5 1 math 30
6 2 math 12
7 3 math 50
8 0 eco 36
9 1 eco 32
10 2 eco 24
11 3 eco 47
[54]: Anova1_fit1=ols('Marks~C(Blocks)+C(Subjects)',data=Anova1).fit()
Anova1_fit1
Anova1_table=sm.stats.anova_lm(Anova1_fit1, typ=2)
Anova1_table
[54]: sum_sq df F PR(>F)

C(Blocks) 572.25 3.0 1.266740 0.367077
C(Subjects) 90.50 2.0 0.300498 0.750975
Residual 903.50 6.0 NaN NaN
5
[62]: Anova1_fit1.summary()

"""
==============================================================================
Dep. Variable: Marks R-squared: 0.423
Model: OLS Adj. R-squared: -0.058
Date: Sun, 24 Jan 2021 Prob (F-statistic): 0.546
Df Model: 5
================================================================================
=======
coef std err t P>|t| [0.025
0.975]
--------------------------------------------------------------------------------
-------
Intercept 41.0000 8.677 4.725 0.003 19.768
62.232
C(Blocks)[T.1] -5.6667 10.019 -0.566 0.592 -30.183
18.850
C(Blocks)[T.2] -17.6667 10.019 -1.763 0.128 -42.183
6.850
C(Blocks)[T.3] -1.6667 10.019 -0.166 0.873 -26.183
22.850
C(Subjects)[T.math] -1.7500 8.677 -0.202 0.847 -22.982
19.482
C(Subjects)[T.stat] 4.7500 8.677 0.547 0.604 -16.482
25.982
==============================================================================
Skew: -0.728 Prob(JB): 0.588
==============================================================================
Notes:
specified.
"""
[89]: df45 = pd.DataFrame({'prepration': np.repeat(['3-hr', 'oneday', '10 week'], 6),
6
'College': np.tile(np.repeat(['Business', 'Engineering',␣
,→ 'Art and sci'], 2), 3),
'score': [500, 580, 540, 460, 480, 400, 460, 540, 560, 620,
420, 480, 560, 600, 600, 580, 480, 410]})
[90]: df45
[90]: prepration College score

0 3-hr Business 500
1 3-hr Business 580
2 3-hr Engineering 540
3 3-hr Engineering 460
4 3-hr Art and sci 480
5 3-hr Art and sci 400
6 oneday Business 460
7 oneday Business 540
8 oneday Engineering 560
9 oneday Engineering 620
10 oneday Art and sci 420
11 oneday Art and sci 480
12 10 week Business 560
13 10 week Business 600
14 10 week Engineering 600
15 10 week Engineering 580
16 10 week Art and sci 480
17 10 week Art and sci 410
[92]: fit_model=ols('score~C(College)+C(prepration)+C(College):C(prepration)',␣
,→data=df45).fit()
fit_model
fit_anova=anova_lm(fit_model, typ=2)
fit_anova
[92]: sum_sq df F PR(>F)

C(College) 45300.0 2.0 10.269521 0.004757
C(prepration) 6100.0 2.0 1.382872 0.299436
C(College):C(prepration) 11200.0 4.0 1.269521 0.350328
Residual 19850.0 9.0 NaN NaN
[99]: fit_model.summary()

"""
==============================================================================
Dep. Variable: score R-squared: 0.759
Model: OLS Adj. R-squared: 0.545
7
Date: Mon, 25 Jan 2021 Prob (F-statistic): 0.0384
Df Model: 8
================================================================================
=====================================
coef std err
t P>|t| [0.025 0.975]
--------------------------------------------------------------------------------
-------------------------------------
Intercept 445.0000 33.208
13.400 0.000 369.878 520.122
C(College)[T.Business] 135.0000 46.963
2.875 0.018 28.762 241.238
C(College)[T.Engineering] 145.0000 46.963
3.088 0.013 38.762 251.238
C(prepration)[T.3-hr] -5.0000 46.963
-0.106 0.918 -111.238 101.238
C(prepration)[T.oneday] 5.0000 46.963
0.106 0.918 -101.238 111.238
C(College)[T.Business]:C(prepration)[T.3-hr] -35.0000 66.416
-0.527 0.611 -185.244 115.244
C(College)[T.Engineering]:C(prepration)[T.3-hr] -85.0000 66.416
-1.280 0.233 -235.244 65.244
C(College)[T.Business]:C(prepration)[T.oneday] -85.0000 66.416
-1.280 0.233 -235.244 65.244
C(College)[T.Engineering]:C(prepration)[T.oneday] -5.0000 66.416
-0.075 0.942 -155.244 145.244
==============================================================================
Skew: 0.000 Prob(JB): 0.311
==============================================================================
Notes:
specified.
"""

ANOVA

Uploaded by

Document Information

Original Description:

Copyright

Available Formats

Share this document

Share or Embed Document

Sharing Options

Did you find this document useful?

Is this content inappropriate?

Copyright:

Available Formats

ANOVA

Uploaded by

Copyright:

Available Formats

ANOVA

January 25, 2021

[1]: import pandas as pd

Import data from pc

[3]: naz=pd.read_excel(r'C:\Users\Nazakat ali\Desktop\python\naz.xlsx')

[3]: stat math eco

1 Create data for one wat anova formate

[4]: No Subjects Marks

2 Create summary of data set

[29]: N Mean SD SE 95% Conf. Interval

[35]: Anova.boxplot(column='Marks',by='Subjects', figsize=(10,8), color='r')

[35]: <AxesSubplot:title={'center':'Marks'}, xlabel='Subjects'>

[55]: <statsmodels.regression.linear_model.RegressionResultsWrapper at 0x20174440a90>

[56]: Anova_table=sm.stats.anova_lm(Anova_fit, typ=1)

[56]: df sum_sq mean_sq F PR(>F)

[98]: from statsmodels.stats.anova import anova_lm

[61]: <class 'statsmodels.iolib.summary.Summary'>

[47]: #create data frame for LSD

[47]: <statsmodels.sandbox.stats.multicomp.MultiComparison at 0x2017439bf70>

[52]: <class 'statsmodels.iolib.table.SimpleTable'>

[53]: Blocks Subjects Marks

[54]: sum_sq df F PR(>F)

[62]: <class 'statsmodels.iolib.summary.Summary'>

[89]: df45 = pd.DataFrame({'prepration': np.repeat(['3-hr', 'oneday', '10 week'], 6),

[90]: prepration College score

[92]: sum_sq df F PR(>F)

[99]: <class 'statsmodels.iolib.summary.Summary'>

You might also like