hotelling1 <- function(x,mu0)
{
# vstup: datova matica x, vektor str.h. z nulovej hypotezy
dimx<-dim(x)
# vypocet vyberoveho priemeru a vyb.kov.matice
xmean <- apply(x,2,mean)
s <- var(x)
invs <- solve(s)
# vypocet testovacej statistiky
HT2 <- dimx[1]*t(as.matrix(xmean-mu0))%*%invs%*%as.matrix(xmean-mu0)
# konverzia na hodnotu F, vypocet stupnov volnosti a p-hodnoty
Fval <- (dimx[1]-dimx[2])*HT2/(dimx[2]*(dimx[1]-1))
df1 <- dimx[2]
df2 <- dimx[1]-dimx[2]
pval <- 1-pf(Fval, df1, df2)
cat("T-squared=", HT2, fill=TRUE)
df<-cbind(df1, df2)
cat("F-value=", Fval, " df=", df, " p-value=", pval, fill=TRUE)
}
Vygenerujeme 300 bodov z rozdelenia \(N_3(0,I_3)\) a pomocou Hotellingovho testu overíme, či \(\mu=0\).
x <- matrix(rnorm(300), nrow=100)
x <- as.data.frame(x)
head(x)
## V1 V2 V3
## 1 0.8527174 -0.41326671 0.04088872
## 2 -1.2470798 -1.04337782 1.16555694
## 3 -0.9342656 0.12209469 -0.82065004
## 4 -2.4596581 -0.11051475 0.11379101
## 5 -1.0034976 0.01359941 0.08919897
## 6 -1.0687092 -0.44829678 -0.39506678
mu0<-rep(0,3)
plotids <- plot3d(x[,1],x[,2],x[,3], type="p")
spheres3d(mu0[1],mu0[2],mu0[3], col="red", radius=0.2)
You must enable Javascript to view this page properly.
hotelling1(x,mu0)
## T-squared= 2.145906
## F-value= 0.7008515 df= 3 97 p-value= 0.5537867
mu0 <- c(2,3,1)
hotelling1(x,mu0)
## T-squared= 1595.039
## F-value= 520.9385 df= 3 97 p-value= 0
Merali sme hladiny minerálov v pote 20 športovcov. Namerané dáta sa dajú reprezentovať ako trojrozmerný vektor s premennými:
X1: miera potenia
X2: obsah sodika
X3: obsah draslika
Chceme otestovať \(H_0: \mu=(4, 50, 10)'\) na hladine \(\alpha=0.10\).
(data <- read.table("http://www.iam.fmph.uniba.sk/ospm/Filova/vsa/sweat.dat",header=FALSE))
## V1 V2 V3
## 1 3.7 48.5 9.3
## 2 5.7 65.1 8.0
## 3 3.8 47.2 10.9
## 4 3.2 53.2 12.0
## 5 3.1 55.5 9.7
## 6 4.6 36.1 7.9
## 7 2.4 24.8 14.0
## 8 7.2 33.1 7.6
## 9 6.7 47.4 8.5
## 10 5.4 54.1 11.3
## 11 3.9 36.9 12.7
## 12 4.5 58.8 12.3
## 13 3.5 27.8 9.8
## 14 4.5 40.2 8.4
## 15 1.5 13.5 10.1
## 16 8.5 56.4 7.1
## 17 4.5 71.6 8.2
## 18 6.5 52.8 10.9
## 19 4.1 44.1 11.2
## 20 5.5 40.9 9.4
(mu <- colMeans(data))
## V1 V2 V3
## 4.640 45.400 9.965
(S <- var(data))
## V1 V2 V3
## V1 2.879368 10.0100 -1.809053
## V2 10.010000 199.7884 -5.640000
## V3 -1.809053 -5.6400 3.627658
plotids <- plot3d(data[,1],data[,2],data[,3], type="s", col="blue")
spheres3d(4,50,10, col="red", radius=1.5)
hotelling1(data, c(4,50,10))
## T-squared= 9.738773
## F-value= 2.904546 df= 3 17 p-value= 0.06492834
You must enable Javascript to view this page properly.
hotelling2<-function(x1,x2)
{
# vstup: datove matice x1,x2
p<-dim(x1)[2]
n1<-dim(x1)[1]
n2<-dim(x2)[1]
# vypocet vyberovych priemerov a vyb. kov. matic
m1<-apply(x1,2,mean)
s1<-var(x1)
m2<-apply(x2,2,mean)
s2<-var(x2)
# vazeny priemer vyb. kov. matic
s<-((n1-1)*s1+(n2-1)*s2)/(n1+n2-2)
# vypocet testovacej statistiky
T2<-((n1*n2)/(n1+n2))*t(m1-m2)%*%solve(s)%*%(m1-m2)
# konverzia na hodnotu F, vypocet stupnov volnosti a p-hodnoty
Fval<-(n1+n2-p-1)*T2/(p*(n1+n2-2))
df1<-p
df2<-n1+n2-p-1
pval<-1-pf(Fval,df1,df2)
cat("T-squared=",T2,fill=TRUE)
df<-cbind(df1,df2)
cat("F-value=",Fval," df=",df," p-value=",pval,fill=T)
}
Merieme vplyv dvoch rôznych teplôt spracovania ocele na jej tvárnosť a silu.
x<-matrix(scan("http://www.iam.fmph.uniba.sk/ospm/Filova/vsa/steel.dat"), ncol=3,byrow=T)
x
## [,1] [,2] [,3]
## [1,] 1 33 60
## [2,] 1 36 61
## [3,] 1 35 64
## [4,] 1 38 63
## [5,] 1 40 65
## [6,] 2 35 57
## [7,] 2 36 59
## [8,] 2 38 59
## [9,] 2 39 61
## [10,] 2 41 63
## [11,] 2 43 65
## [12,] 2 41 59
p<-dim(x)[2]
x1<-x[x[,1]=="1",2:p]
x2<-x[x[,1]=="2",2:p]
hotelling2(x1,x2)
## T-squared= 23.91171
## F-value= 10.76027 df= 2 9 p-value= 0.004106073
Načítame dáta o odpadových vodách
wastewater<-read.table("http://www.iam.fmph.uniba.sk/ospm/Filova/vsa/wastewater.txt",header=TRUE)
Vytvoríme dátový rámec s diferenciami
D<-data.frame(cbind(wastewater[,1]-wastewater[,3],wastewater[,2]-wastewater[,4]))
names(D)<-c("bod","ss")
Vypočítame hodnotu \(T^2\) štatistiky pre \(H_0:d=0\)
n<-dim(D)[1]
p<-dim(D)[2]
dbar<-c(mean(D[,1]),mean(D[,2]))
Sd<-cov(D)
Sdinv <- solve(Sd)
T2<-n*t(dbar)%*%Sdinv%*%dbar
T2
## [,1]
## [1,] 13.63931
f.t2=(p*(n-1)/(n-p))*qf(0.05,p,n-p,lower.tail=F)
f.t2
## [1] 9.458877
pval=pf(T2*(n-p)/((n-1)*p),p,n-p,lower.tail=F)
pval
## [,1]
## [1,] 0.02082779
Vykreslíme 95% elipsu spoľahlivosti pre vektor priemerných diferencií, vypočítame 95% simultánne a Bonferroniho IS a zakreslíme ich do elipsy
library(ellipse)
##
## Attaching package: 'ellipse'
## The following object is masked from 'package:graphics':
##
## pairs
plot(ellipse(Sd,centre=dbar,t=sqrt(((n-1)*p/(n*(n-p)))*qf(0.95,p,n-p))),type="l",xlim=c(-25,10),ylim=c(-10,35), col="blue")
points(dbar[1],dbar[2])
delta1.L=dbar[1]-sqrt(((n-1)*p/(n-p))*qf(0.95,p,n-p))*sqrt(Sd[1,1]/n)
delta1.U=dbar[1]+sqrt(((n-1)*p/(n-p))*qf(0.95,p,n-p))*sqrt(Sd[1,1]/n)
delta2.L=dbar[2]-sqrt(((n-1)*p/(n-p))*qf(0.95,p,n-p))*sqrt(Sd[2,2]/n)
delta2.U=dbar[2]+sqrt(((n-1)*p/(n-p))*qf(0.95,p,n-p))*sqrt(Sd[2,2]/n)
c(delta1.L,delta1.U)
## [1] -22.45327 3.72600
c(delta2.L,delta2.U)
## [1] -5.700119 32.245574
lines(c(delta1.L,delta1.L),c(-12,delta2.U),lty=2)
lines(c(delta1.U,delta1.U),c(-12,delta2.U),lty=2)
lines(c(-27,delta1.U),c(delta2.L,delta2.L),lty=2)
lines(c(-27,delta1.U),c(delta2.U,delta2.U),lty=2)
delta1.LB=dbar[1]-qt(0.05/(2*p),n-1,lower.tail=F)*sqrt(Sd[1,1]/n)
delta1.UB=dbar[1]+qt(0.05/(2*p),n-1,lower.tail=F)*sqrt(Sd[1,1]/n)
delta2.LB=dbar[2]-qt(0.05/(2*p),n-1,lower.tail=F)*sqrt(Sd[2,2]/n)
delta2.UB=dbar[2]+qt(0.05/(2*p),n-1,lower.tail=F)*sqrt(Sd[2,2]/n)
c(delta1.LB,delta1.UB)
## [1] -20.573107 1.845835
c(delta2.LB,delta2.UB)
## [1] -2.974903 29.520358
lines(c(delta1.LB,delta1.LB),c(-12,delta2.UB),lty=3)
lines(c(delta1.UB,delta1.UB),c(-12,delta2.UB),lty=3)
lines(c(-27,delta1.UB),c(delta2.LB,delta2.LB),lty=3)
lines(c(-27,delta1.UB),c(delta2.UB,delta2.UB),lty=3)
library(heplots)
## Loading required package: car
## Loading required package: carData
##
## Attaching package: 'car'
## The following object is masked from 'package:ellipse':
##
## ellipse
##
## Attaching package: 'heplots'
## The following object is masked from 'package:rgl':
##
## arrow3d
data(iris)
Elipsoidy koncentrácie (necentrované a centrované)
covEllipses(iris[,1:4], iris$Species, fill=c(rep(FALSE,3), TRUE), variables=1:4,fill.alpha=.1)
covEllipses(iris[,1:4], iris$Species, center=TRUE,fill=c(rep(FALSE,3), TRUE), variables=1:4,label.pos=c(1:3,0), fill.alpha=.1)
Jednorozmerný Bartlettov test
bartlett.test(iris[,1:4],iris[,5])
##
## Bartlett test of homogeneity of variances
##
## data: iris[, 1:4]
## Bartlett's K-squared = 294.19, df = 3, p-value < 2.2e-16
Boxov M test
res <- boxM(iris[, 1:4], iris[, "Species"])
res
##
## Box's M-test for Homogeneity of Covariance Matrices
##
## data: iris[, 1:4]
## Chi-Sq (approx.) = 140.94, df = 20, p-value < 2.2e-16
plot(res)
Boxploty
op <- par(mfrow=c(1, 4), mar=c(5,4,1,1))
for (response in names(iris)[1:4]){
Boxplot(iris[, response] ~ Species, data=iris,
ylab=response, axes=FALSE, col=c("red", "blue", "gray"))
box()
axis(2)
axis(1, at=1:3, labels=c("Setosa", "Vers.", "Virginca"))
}
par(op)
Cieľom kontroly kvality je odhaliť výskyt variácií v kvalite produktu, ktoré nie sú dôsledkom štandardných príčin (mierna nepresnosť prístroja, variabilita materiálu…). Výsledky takejto kontroly sa zaznamenávajú do rôznych control charts.
\(\bar{X}\) chart pozostáva z dát zakreslených v časovom poradí a horizontálnych čiar, ktoré označujú očakávanú variáciu. Presnejšie
zakrelíme dáta v časovom poradí
zakreslíme priemer všetkých pozorovaní \(\bar{x}\)
vypočítame kontrolné limity \(UCL=\bar{x}+3s,\ LCL= \bar{x}-3s\)
Uvažujme dáta, ktoré vznikli výberom 20 vzoriek výrobkov, pričom každá vzorka obsahovala 4 výrobky.
library(qcc)
## Package 'qcc' version 2.7
## Type 'citation("qcc")' for citing this R package in publications.
X1 <- matrix(c(72, 56, 55, 44, 97, 83, 47, 88, 57, 26, 46,
49, 71, 71, 67, 55, 49, 72, 61, 35, 84, 87, 73, 80, 26, 89, 66,
50, 47, 39, 27, 62, 63, 58, 69, 63, 51, 80, 74, 38, 79, 33, 22,
54, 48, 91, 53, 84, 41, 52, 63, 78, 82, 69, 70, 72, 55, 61, 62,
41, 49, 42, 60, 74, 58, 62, 58, 69, 46, 48, 34, 87, 55, 70, 94,
49, 76, 59, 57, 46), ncol = 4)
X1
## [,1] [,2] [,3] [,4]
## [1,] 72 84 79 49
## [2,] 56 87 33 42
## [3,] 55 73 22 60
## [4,] 44 80 54 74
## [5,] 97 26 48 58
## [6,] 83 89 91 62
## [7,] 47 66 53 58
## [8,] 88 50 84 69
## [9,] 57 47 41 46
## [10,] 26 39 52 48
## [11,] 46 27 63 34
## [12,] 49 62 78 87
## [13,] 71 63 82 55
## [14,] 71 58 69 70
## [15,] 67 69 70 94
## [16,] 55 63 72 49
## [17,] 49 51 55 76
## [18,] 72 80 61 59
## [19,] 61 74 62 57
## [20,] 35 38 41 46
q1 <- qcc(X1, type = "xbar")
Nech \(\mathbf X_1,\ldots,\mathbf X_n\) sú nezávislé rovnako rozdelené z rozdelenia \(N_p(\mu,\Sigma)\). Pre j-te pozorovanie vypočítame hodnotu \(T^2\) štatistiky \[ T^2_j=(\mathbf x_j-\bar{\mathbf x})'S^{-1}(\mathbf x_j-\bar{\mathbf x}) \] a zakreslíme tieto hodnoty na časovú os. Dolný kontrolný limit je nula a horný kontrolný limit je \(UCL=\chi^2_p(0.05)\).
Predpokladajme, že v predchádzajúcom príklade sme pri každom výrobku merali dve premenné (napríklad okrem vonkajšieho aj vnútorný polomer výrobku).
X2 <- matrix(c(23, 14, 13, 9, 36, 30, 12, 31, 14, 7, 10,
11, 22, 21, 18, 15, 13, 22, 19, 10, 30, 31, 22, 28, 10, 35, 18,
11, 10, 11, 8, 20, 16, 19, 19, 16, 14, 28, 20, 11, 28, 8, 6,
15, 14, 36, 14, 30, 8, 35, 19, 27, 31, 17, 18, 20, 16, 18, 16,
13, 10, 9, 16, 25, 15, 18, 16, 19, 10, 30, 9, 31, 15, 20, 35,
12, 26, 17, 14, 16), ncol = 4)
X <- list(X1 = X1, X2 = X2)
X
## $X1
## [,1] [,2] [,3] [,4]
## [1,] 72 84 79 49
## [2,] 56 87 33 42
## [3,] 55 73 22 60
## [4,] 44 80 54 74
## [5,] 97 26 48 58
## [6,] 83 89 91 62
## [7,] 47 66 53 58
## [8,] 88 50 84 69
## [9,] 57 47 41 46
## [10,] 26 39 52 48
## [11,] 46 27 63 34
## [12,] 49 62 78 87
## [13,] 71 63 82 55
## [14,] 71 58 69 70
## [15,] 67 69 70 94
## [16,] 55 63 72 49
## [17,] 49 51 55 76
## [18,] 72 80 61 59
## [19,] 61 74 62 57
## [20,] 35 38 41 46
##
## $X2
## [,1] [,2] [,3] [,4]
## [1,] 23 30 28 10
## [2,] 14 31 8 9
## [3,] 13 22 6 16
## [4,] 9 28 15 25
## [5,] 36 10 14 15
## [6,] 30 35 36 18
## [7,] 12 18 14 16
## [8,] 31 11 30 19
## [9,] 14 10 8 10
## [10,] 7 11 35 30
## [11,] 10 8 19 9
## [12,] 11 20 27 31
## [13,] 22 16 31 15
## [14,] 21 19 17 20
## [15,] 18 19 18 35
## [16,] 15 16 20 12
## [17,] 13 14 16 26
## [18,] 22 28 18 17
## [19,] 19 20 16 14
## [20,] 10 11 13 16
q <- mqcc(X, type = "T2")
Nech \(\mathbf X_1,\ldots,\mathbf X_n\) sú nezávislé rovnako rozdelené z rozdelenia \(N_p(\mu,\Sigma)\). Potom \(E(\mathbf X_j-\bar{\mathbf X})=0\) a \(Var(\mathbf X_j-\bar{\mathbf X})=\frac{n-1}{n}\Sigma\). Vieme, že \(\mathbf X_j-\bar{\mathbf X}\) má normálne rozdelenie pre všetky \(j\), ale nie je nezávislé od \(S\). Budeme ale aproximovať \((\mathbf X_j-\bar{\mathbf X})'S^{-1}(\mathbf X_j-\bar{\mathbf X})\) pomocou \(\chi^2\) rozdelenia.
Ellipse control chart je založený na tom, že 95% elipsoid pozostáva z takých \(\mathbf x\), pre ktoré platí \[ (\mathbf x-\bar{\mathbf x})'S^{-1}(\mathbf x-\bar{\mathbf x})\leq \chi^2_2(0.05) \]
ellipseChart(q)
ellipseChart(q, show.id = TRUE)