# Cvicenia z viacrozmernych statistickych analyz 2 # Radoslav Harman, FMFI UK, Bratislava # Uvodny priklad: Lamos, Potocky str. 289 # Data a analyza je zalozena na casti 7 v knihe Everitt, B: RSPCMA m1<-c(54,49,52) m2<-c(31,30,31) S1<-matrix(c(24,18,16,18,20,15,16,15,22),ncol=3) S2<-matrix(c(23,17,14,17,19,12,14,12,23),ncol=3) q1<-1/3;q2<-2/3 C12<-2;C21<-3 S<-(S1+S2)/2 #Podla zadania n1=n2 a<-solve(S)%*%(m1-m2); a b<-0.5*(t(m2)%*%solve(S)%*%m2-t(m1)%*%solve(S)%*%m1) k<-q2*C12/q1/C21 log(k)-b alpha<-t(m1-m2)%*%solve(S)%*%(m1-m2) 1-pnorm((log(k)+alpha/2)/sqrt(alpha)) #Odhad P(1|2) pnorm((log(k)-alpha/2)/sqrt(alpha)) #Odhad P(2|1) pnorm(42.5,mean=54,sd=sqrt(23.5)) #Chyba klasifikacie ak by sme rozhodovali len na zaklade x1 1-pnorm(42.5,mean=31,sd=sqrt(23.5)) # Vacsina odhadov misklasifikacie sa neda vypocitat, lebo nema data # Everitt: # Nacitat data Tibet Tibet<-read.table("c:/tibet.txt",header=T) attach(Tibet) x<-t(prcomp(Tibet[,-6])$x[,1]) y<-t(prcomp(Tibet[,-6])$x[,2]) plot(x,y,col=as.numeric(Type),pch=19,cex=2) x1<-Tibet[Type==1,-6] x2<-Tibet[Type==2,-6] m1<-apply(x1,2,mean); m1 m2<-apply(x2,2,mean); m2 l1<-dim(x1)[1]; l2<-dim(x2)[1] S123<-((l1-1)*var(x1)+(l2-1)*var(x2))/(l1+l2-2) # Test rovnosti strednych hodnot T2<-(l1*l2)/(l1+l2)*t(m1-m2)%*%solve(S123)%*%(m1-m2) Fstat<-(l1+l2-5-1)*T2/((l1+l2-2)*5) pvalue<-1-pf(Fstat,5,26) # Vypocet linearnej diskriminacnej funkcie a<-solve(S123)%*%(m1-m2); a z12<-(m1%*%a+m2%*%a)/2; z12 #To je to iste ako vzorec z prednasky, aha: b<-0.5*(t(m2)%*%solve(S123)%*%m2-t(m1)%*%solve(S123)%*%m1); b library(MASS); help(lda) dis<-lda(Type~Length+Breadth+Height+Fheight+Fbreadth,data=Tibet,prior=c(1/2,1/2)) dis$means dis$scaling dis$scaling/a t(dis$scaling)%*%(m1+m2)/2 # Resubstitucia; zobrazenie cls<-1.5-sign(as.matrix(Tibet[,-6])%*%a-z12[1,1])/2 plot(x,y,col=as.numeric(Type),pch=19+2*abs(cls-Tibet[,6]),cex=2) # Odhady pravdepodobnosti chybnych klasifikacii su teda 3/17, 3/15 alpha<-t(m1-m2)%*%solve(S123)%*%(m1-m2) k<-1; 1-pnorm((log(k)+alpha/2)/sqrt(alpha)) #Odhad P(1|2) pnorm((log(k)-alpha/2)/sqrt(alpha)) #Odhad P(2|1) # Odhady P(1|2) a P{2|1) pomocou M-vzdialenosti su rovnake, # vsimnite si, ze podla vzorcov je to tak vzdy ked k=1