Jednofaktorová analýza rozptylu

Farmaceutická spoločnosť testuje 5 rôznych liekov na migrénu na 56 pacientoch, ktorí užili liek pri najbližšej migréne a zaznamenali následnú zmenu intenzity bolesti na škále od 1 do 5.

an1 <- read.table("http://www.iam.fmph.uniba.sk/ospm/Filova/vsa/anova1.csv", header=TRUE)
an1$drug<-factor(an1$drug)

summary(an1)
##       pain       drug  
##  Min.   :1.500   1:13  
##  1st Qu.:2.375   2:13  
##  Median :2.950   3:13  
##  Mean   :2.846   4:12  
##  3rd Qu.:3.325   5: 5  
##  Max.   :4.100

Sumárne štatistiky podľa faktorov

with(an1,tapply(pain,drug,summary))
## $`1`
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##   1.500   2.100   2.500   2.485   2.800   3.400 
## 
## $`2`
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##   2.600   2.900   3.000   3.054   3.100   3.800 
## 
## $`3`
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##   1.600   1.900   2.400   2.485   3.000   3.500 
## 
## $`4`
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##   1.600   3.250   3.550   3.308   3.750   4.100 
## 
## $`5`
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##    2.20    2.70    2.90    3.08    3.60    4.00
with(an1,tapply(pain,drug,length))
##  1  2  3  4  5 
## 13 13 13 12  5

Stripchart a boxplot

attach(an1)
par(mfrow=c(1,2))
stripchart(pain~drug,method="stack",vertical=T,xlab="drug",ylab="pain",col=rainbow(5),pch=19)
plot(pain~drug,ylab="pain",xlab="drug")

Konštrukcia modelu

Nastavenie základného ošetrenia a typu kontrastu

contr<-contr.treatment(5,base=5)
an1$drug<-C(an1$drug,contr)

Fitovanie modelu

(tc.mod<-aov(pain~drug,an1))
## Call:
##    aov(formula = pain ~ drug, data = an1)
## 
## Terms:
##                      drug Residuals
## Sum of Squares   6.795965 18.783321
## Deg. of Freedom         4        51
## 
## Residual standard error: 0.6068776
## Estimated effects may be unbalanced
(tc.sum<-summary.lm(tc.mod))
## 
## Call:
## aov(formula = pain ~ drug, data = an1)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -1.70833 -0.38115  0.01538  0.39760  1.01538 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  3.08000    0.27140  11.348 1.45e-15 ***
## drug1       -0.59538    0.31936  -1.864    0.068 .  
## drug2       -0.02615    0.31936  -0.082    0.935    
## drug3       -0.59538    0.31936  -1.864    0.068 .  
## drug4        0.22833    0.32304   0.707    0.483    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.6069 on 51 degrees of freedom
## Multiple R-squared:  0.2657, Adjusted R-squared:  0.2081 
## F-statistic: 4.613 on 4 and 51 DF,  p-value: 0.002956

ANOVA tabuľka

summary(tc.mod)
##             Df Sum Sq Mean Sq F value  Pr(>F)   
## drug         4  6.796  1.6990   4.613 0.00296 **
## Residuals   51 18.783  0.3683                   
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Testovanie predpokladov

Predpoklad konštantných variancií

y.hat<-fitted.values(tc.mod)
e<-residuals(tc.mod)

par(mfrow=c(1,2))
plot(e~y.hat,xlab="Fitted values",ylab="Residuals",pch=19,col="red")
stripchart(e~drug,an1,method="jitter",vertical=T,ylab="Residuals",xlab="treatments",pch=19,col=rainbow(5))

bartlett.test(pain ~ drug, data=an1)
## 
##  Bartlett test of homogeneity of variances
## 
## data:  pain by drug
## Bartlett's K-squared = 8.9646, df = 4, p-value = 0.06199

Predpoklad normality (Shapiro-Wilkov test normality)

qqnorm(e)

shapiro.test(e)
## 
##  Shapiro-Wilk normality test
## 
## data:  e
## W = 0.96978, p-value = 0.1717

Párové porovnania efektov ošetrení

S kontrolným ošetrením

tc.sum
## 
## Call:
## aov(formula = pain ~ drug, data = an1)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -1.70833 -0.38115  0.01538  0.39760  1.01538 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  3.08000    0.27140  11.348 1.45e-15 ***
## drug1       -0.59538    0.31936  -1.864    0.068 .  
## drug2       -0.02615    0.31936  -0.082    0.935    
## drug3       -0.59538    0.31936  -1.864    0.068 .  
## drug4        0.22833    0.32304   0.707    0.483    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.6069 on 51 degrees of freedom
## Multiple R-squared:  0.2657, Adjusted R-squared:  0.2081 
## F-statistic: 4.613 on 4 and 51 DF,  p-value: 0.002956

Bez kontrolného ošetrenia

Simultánne párové t-testy

with(an1,pairwise.t.test(pain,drug,p.adj="bonf"))
## 
##  Pairwise comparisons using t tests with pooled SD 
## 
## data:  pain and drug 
## 
##   1     2     3     4    
## 2 0.205 -     -     -    
## 3 1.000 0.205 -     -    
## 4 0.014 1.000 0.014 -    
## 5 0.680 1.000 0.680 1.000
## 
## P value adjustment method: bonferroni

Tukey

TukeyHSD(tc.mod,ordered=T)
##   Tukey multiple comparisons of means
##     95% family-wise confidence level
##     factor levels have been ordered
## 
## Fit: aov(formula = pain ~ drug, data = an1)
## 
## $drug
##             diff        lwr       upr     p adj
## 1-3 4.440892e-16 -0.6731112 0.6731112 1.0000000
## 2-3 5.692308e-01 -0.1038804 1.2423420 0.1342564
## 5-3 5.953846e-01 -0.3076888 1.4984581 0.3493707
## 4-3 8.237179e-01  0.1367267 1.5107092 0.0113278
## 2-1 5.692308e-01 -0.1038804 1.2423420 0.1342564
## 5-1 5.953846e-01 -0.3076888 1.4984581 0.3493707
## 4-1 8.237179e-01  0.1367267 1.5107092 0.0113278
## 5-2 2.615385e-02 -0.8769196 0.9292273 0.9999894
## 4-2 2.544872e-01 -0.4325041 0.9414784 0.8319478
## 4-5 2.283333e-01 -0.6851326 1.1417992 0.9540102

Testovanie všeobecných kontrastov

Definovanie kontrastov

library(heplots)
## Warning: package 'heplots' was built under R version 3.6.1
## Loading required package: car
## Loading required package: carData
contr<-c(1,-.25,-.25,-.25,-.25)
lht(tc.mod, contr,verbose=TRUE)
## 
## Hypothesis matrix:
##      [,1]  [,2]  [,3]  [,4]  [,5]
## [1,]    1 -0.25 -0.25 -0.25 -0.25
## 
## Right-hand-side vector:
## [1] 0
## 
## Estimated linear function (hypothesis.matrix %*% coef - rhs)
## [1] 3.327147
## 
## 
## Estimated variance of linear function
## [1] 0.3018706
## Linear hypothesis test
## 
## Hypothesis:
## (Intercept) - 0.25 drug1 - 0.25 drug2 - 0.25 drug3 - 0.25 drug4 = 0
## 
## Model 1: restricted model
## Model 2: pain ~ drug
## 
##   Res.Df    RSS Df Sum of Sq      F    Pr(>F)    
## 1     52 32.289                                  
## 2     51 18.783  1    13.506 36.671 1.678e-07 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Dvojfaktorová analýza rozptylu

Skúmame účinok 4 značiek samoopaľovacích krémov v 3 konzistenciách (gel, krém, roztok). Máme teda dva faktory:

Pozorovanie y: zmena farby pokožky

an2 <- read.table("http://www.iam.fmph.uniba.sk/ospm/Filova/vsa/anova2.csv", header=TRUE)
an2$A<-factor(an2$A)
an2$B<-factor(an2$B)
summary(an2)
##        y          A      B     
##  Min.   : 5.670   1:14   1:13  
##  1st Qu.: 6.910   2:19   2:11  
##  Median : 7.500   3:14   3:11  
##  Mean   : 7.950          4:12  
##  3rd Qu.: 9.065                
##  Max.   :11.050
attach(an2)
par(mfrow=c(1,2))
interaction.plot(A,B,y,col=rainbow(4),lwd=2)
interaction.plot(B,A,y,col=rainbow(3),lwd=2)

par(mfrow=c(1,1))
par(mfrow=c(2,2))
stripchart(y~A,xlab="y",method="jitter",ylab="A",las=1)
stripchart(y~B,xlab="y",method="jitter",ylab="B",las=1)
plot(y~A,ylab="y",xlab="A",horizontal=T,las=1)
plot(y~B,ylab="y",xlab="B",horizontal=T,las=1)

par(mfrow=c(1,1))
detach(an2)

Konštrukcia modelu

Model s interakciami

summary(aov(y~A+B+A:B,an2))
##             Df Sum Sq Mean Sq  F value Pr(>F)    
## A            2  21.43  10.714  950.500 <2e-16 ***
## B            3  76.26  25.418 2254.991 <2e-16 ***
## A:B          6   0.09   0.014    1.259  0.301    
## Residuals   35   0.39   0.011                    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Model bez interakcií

an2.mod<-aov(y~A+B,an2)
an2.sum<-summary.lm(an2.mod)
summary(an2.mod)
##             Df Sum Sq Mean Sq F value Pr(>F)    
## A            2  21.43  10.714   915.9 <2e-16 ***
## B            3  76.26  25.418  2172.8 <2e-16 ***
## Residuals   41   0.48   0.012                   
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Párové porovnania efektov ošetrení

  1. s kontrolným ošetrením
an2.sum$coefficients
##              Estimate Std. Error   t value     Pr(>|t|)
## (Intercept)  7.439154 0.04058959 183.27737 2.274567e-61
## A2          -1.584498 0.03814345 -41.54051 3.921245e-35
## A3          -1.171834 0.04174978 -28.06802 2.173160e-28
## B2           3.563391 0.04483658  79.47508 1.546087e-46
## B3           1.601406 0.04541072  35.26494 2.702871e-32
## B4           1.141513 0.04394667  25.97495 4.417804e-27
  1. bez kontrolného ošetrenia

Tukey

TukeyHSD(aov(y~A+B,an2),"A",ordered=T)
##   Tukey multiple comparisons of means
##     95% family-wise confidence level
##     factor levels have been ordered
## 
## Fit: aov(formula = y ~ A + B, data = an2)
## 
## $A
##            diff         lwr       upr     p adj
## 2-3 0.009924812 -0.08271182 0.1025614 0.9633107
## 1-3 1.482142857  1.38273572 1.5815500 0.0000000
## 1-2 1.472218045  1.37958142 1.5648547 0.0000000
TukeyHSD(aov(y~B+A,an2),"B",ordered=T)
##   Tukey multiple comparisons of means
##     95% family-wise confidence level
##     factor levels have been ordered
## 
## Fit: aov(formula = y ~ B + A, data = an2)
## 
## $B
##          diff       lwr      upr p adj
## 4-1 1.2167308 1.1007938 1.332668     0
## 3-1 1.6965035 1.5778578 1.815149     0
## 2-1 3.5519580 3.4333124 3.670604     0
## 3-4 0.4797727 0.3588825 0.600663     0
## 2-4 2.3352273 2.2143370 2.456118     0
## 2-3 1.8554545 1.7319642 1.978945     0

Iné usporiadanie

TukeyHSD(aov(y~A+B,an2),ordered=T)
##   Tukey multiple comparisons of means
##     95% family-wise confidence level
##     factor levels have been ordered
## 
## Fit: aov(formula = y ~ A + B, data = an2)
## 
## $A
##            diff         lwr       upr     p adj
## 2-3 0.009924812 -0.08271182 0.1025614 0.9633107
## 1-3 1.482142857  1.38273572 1.5815500 0.0000000
## 1-2 1.472218045  1.37958142 1.5648547 0.0000000
## 
## $B
##          diff       lwr       upr p adj
## 4-1 1.0636346 0.9476976 1.1795715     0
## 3-1 1.4981179 1.3794723 1.6167636     0
## 2-1 3.4883127 3.3696671 3.6069584     0
## 3-4 0.4344834 0.3135931 0.5553736     0
## 2-4 2.4246782 2.3037879 2.5455684     0
## 2-3 1.9901948 1.8667045 2.1136851     0
TukeyHSD(aov(y~B+A,an2),ordered=T)
##   Tukey multiple comparisons of means
##     95% family-wise confidence level
##     factor levels have been ordered
## 
## Fit: aov(formula = y ~ B + A, data = an2)
## 
## $B
##          diff       lwr      upr p adj
## 4-1 1.2167308 1.1007938 1.332668     0
## 3-1 1.6965035 1.5778578 1.815149     0
## 2-1 3.5519580 3.4333124 3.670604     0
## 3-4 0.4797727 0.3588825 0.600663     0
## 2-4 2.3352273 2.2143370 2.456118     0
## 2-3 1.8554545 1.7319642 1.978945     0
## 
## $A
##          diff       lwr       upr p adj
## 3-2 0.4272221 0.3345854 0.5198587     0
## 1-2 1.5800979 1.4874613 1.6727346     0
## 1-3 1.1528759 1.0534687 1.2522830     0

Model s interakciami

anI <- read.table("http://www.iam.fmph.uniba.sk/ospm/Filova/vsa/anovaI.csv", header=TRUE)
anI$A<-factor(anI$A)
anI$B<-factor(anI$B)
attach(anI)
par(mfrow=c(1,2))
interaction.plot(A,B,y,col=rainbow(4),lwd=2)
interaction.plot(B,A,y,col=rainbow(4),lwd=2)

par(mfrow=c(1,1))

Test efektov interakcií

summary(aov(y~A+B+A:B,anI))
##             Df Sum Sq Mean Sq F value   Pr(>F)    
## A            2  0.550   0.275   7.352  0.00261 ** 
## B            3 12.074   4.025 107.643 8.09e-16 ***
## A:B          6  1.156   0.193   5.155  0.00103 ** 
## Residuals   29  1.084   0.037                     
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Vytvoríme premennú AB, kde uložíme hodnoty ij, i=1,2,3, j=1,2,3,4

AB<-character(length(y))
for (i in 1:length(y)){AB[i]<-paste(A[i],B[i],sep="")}

IntMod.dat<-data.frame(y,AB); rm(AB)
(IntMod.dat$AB<-factor(IntMod.dat$AB))
##  [1] 11 11 11 11 12 12 13 13 13 14 14 14 21 21 21 21 21 22 22 23 23 23 23
## [24] 24 24 31 31 31 31 31 32 32 33 33 33 34 34 34 34 34 34
## Levels: 11 12 13 14 21 22 23 24 31 32 33 34
detach(anI)

Otestujeme efekty ošetrení pre nový model

summary(aov(y~AB,IntMod.dat))
##             Df Sum Sq Mean Sq F value   Pr(>F)    
## AB          11 13.780  1.2527   33.51 1.65e-13 ***
## Residuals   29  1.084  0.0374                     
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Tukeyho porovnania

(tests<-TukeyHSD(aov(y~AB,IntMod.dat)))
##   Tukey multiple comparisons of means
##     95% family-wise confidence level
## 
## Fit: aov(formula = y ~ AB, data = IntMod.dat)
## 
## $AB
##             diff         lwr          upr     p adj
## 12-11 -0.3600000 -0.95372820  0.233728200 0.5945554
## 13-11  1.0266667  0.50304761  1.550285722 0.0000070
## 14-11  0.9100000  0.38638095  1.433619055 0.0000570
## 21-11 -0.2180000 -0.67789989  0.241899886 0.8626670
## 22-11 -0.8750000 -1.46872820 -0.281271800 0.0007023
## 23-11  1.2075000  0.72272295  1.692277046 0.0000001
## 24-11  0.4050000 -0.18872820  0.998728200 0.4254935
## 31-11 -0.2640000 -0.72389989  0.195899886 0.6677741
## 32-11 -0.2500000 -0.84372820  0.343728200 0.9308383
## 33-11  0.6100000  0.08638095  1.133619055 0.0122496
## 34-11  0.3900000 -0.05253887  0.832538872 0.1244177
## 13-12  1.3866667  0.76082219  2.012511141 0.0000007
## 14-12  1.2700000  0.64415553  1.895844475 0.0000037
## 21-12  0.1420000 -0.43159594  0.715595936 0.9988726
## 22-12 -0.5150000 -1.20057827  0.170578273 0.2925943
## 23-12  1.5675000  0.97377180  2.161228200 0.0000000
## 24-12  0.7650000  0.07942173  1.450578273 0.0188670
## 31-12  0.0960000 -0.47759594  0.669595936 0.9999733
## 32-12  0.1100000 -0.57557827  0.795578273 0.9999825
## 33-12  0.9700000  0.34415553  1.595844475 0.0003405
## 34-12  0.7500000  0.19022768  1.309772316 0.0024791
## 14-13 -0.1166667 -0.67643898  0.443105649 0.9997712
## 21-13 -1.2446667 -1.74534225 -0.743991087 0.0000001
## 22-13 -1.9016667 -2.52751114 -1.275822192 0.0000000
## 23-13  0.1808333 -0.34278572  0.704452388 0.9822420
## 24-13 -0.6216667 -1.24751114  0.004177808 0.0527686
## 31-13 -1.2906667 -1.79134225 -0.789991087 0.0000000
## 32-13 -1.2766667 -1.90251114 -0.650822192 0.0000034
## 33-13 -0.4166667 -0.97643898  0.143105649 0.3044771
## 34-13 -0.6366667 -1.12144371 -0.151889621 0.0031731
## 21-14 -1.1280000 -1.62867558 -0.627324420 0.0000005
## 22-14 -1.7850000 -2.41084447 -1.159155525 0.0000000
## 23-14  0.2975000 -0.22611905  0.821119055 0.6808366
## 24-14 -0.5050000 -1.13084447  0.120844475 0.2073045
## 31-14 -1.1740000 -1.67467558 -0.673324420 0.0000002
## 32-14 -1.1600000 -1.78584447 -0.534155525 0.0000191
## 33-14 -0.3000000 -0.85977232  0.259772316 0.7498490
## 34-14 -0.5200000 -1.00477705 -0.035222954 0.0273506
## 22-21 -0.6570000 -1.23059594 -0.083404064 0.0145651
## 23-21  1.4255000  0.96560011  1.885399886 0.0000000
## 24-21  0.6230000  0.04940406  1.196595936 0.0243816
## 31-21 -0.0460000 -0.47959777  0.387597771 0.9999998
## 32-21 -0.0320000 -0.60559594  0.541595936 1.0000000
## 33-21  0.8280000  0.32732442  1.328675580 0.0001267
## 34-21  0.6080000  0.19286174  1.023138260 0.0007660
## 23-22  2.0825000  1.48877180  2.676228200 0.0000000
## 24-22  1.2800000  0.59442173  1.965578273 0.0000168
## 31-22  0.6110000  0.03740406  1.184595936 0.0291334
## 32-22  0.6250000 -0.06057827  1.310578273 0.0995578
## 33-22  1.4850000  0.85915553  2.110844475 0.0000002
## 34-22  1.2650000  0.70522768  1.824772316 0.0000005
## 24-23 -0.8025000 -1.39622820 -0.208771800 0.0022204
## 31-23 -1.4715000 -1.93139989 -1.011600114 0.0000000
## 32-23 -1.4575000 -2.05122820 -0.863771800 0.0000001
## 33-23 -0.5975000 -1.12111905 -0.073880945 0.0151283
## 34-23 -0.8175000 -1.26003887 -0.374961128 0.0000203
## 31-24 -0.6690000 -1.24259594 -0.095404064 0.0121027
## 32-24 -0.6550000 -1.34057827  0.030578273 0.0712777
## 33-24  0.2050000 -0.42084447  0.830844475 0.9881260
## 34-24 -0.0150000 -0.57477232  0.544772316 1.0000000
## 32-31  0.0140000 -0.55959594  0.587595936 1.0000000
## 33-31  0.8740000  0.37332442  1.374675580 0.0000529
## 34-31  0.6540000  0.23886174  1.069138260 0.0002672
## 33-32  0.8600000  0.23415553  1.485844475 0.0017982
## 34-32  0.6400000  0.08022768  1.199772316 0.0148349
## 34-33 -0.2200000 -0.70477705  0.264777046 0.8920072
plot(tests)

Úloha

Na otestovanie hypotézy, že ľudia rôznych vekových kategórií majú rôzny hudobný vkus sme vybrali 45 ľudí vo veku do 40 rokov a 45 ľudí starších ako 40 rokov. Každú z týchto skupín sme ďalej rozdelili do troch podskupín po 15 ludi, ktorí museli hodinu počúvať Senzus (1.skupina), Madonnu (2.skupina) alebo Avenged Sevenfold (3.skupina). Po hodine sme každú osobu požiadali o hodnotenie vypočutej hudby na škále od -100 (vzdám sa polovice majetku, len aby som to už nemusel počúvať) po 100 (pokojne zvládnem ešte ďalšie dve hodiny).

V súbore http://www.iam.fmph.uniba.sk/ospm/Filova/vsa/music.dat sú uvedené merania nasledovných premenných:

  • music: druh hudby

  • age: vek (1: do 40 rokov, 2: nad 40 rokov)

  • liking: hodnotenie vypočutej hudby

Vytvorte model analýzy rozptylu a otestujte hypotézu o významnosti interakcií medzi vekom a hudobným vkusom. Vypočítajte Tukeyho intervaly spoľahlivosti pre rozdiely priemerov medzi jednotlivými kombináciami úrovní faktorov.