# Predmet: "Analyza zhlukov a klasifikacia dat" # Studijny program: "Pravdepodobnost a matematicka statistika" # Vyucujuci: Radoslav Harman, KAMS FMFI UK Bratislava # # Kanonicke korelacie # Data headsize su z Everit, BS: An R and S-PLUS Companion to Multivariate Analysis, Springer 2005 # Data BRIAN maju neznamy povod (treba ich brat s rezervou), vid: https://dasl.datadescription.com/datafile/brain-size/ ## Priklad na kanonicke korelacie ak p=2 aj q=2 # Komentar od Everitta: # To begin we shall illustrate the application of canonical correlation analysis on a # data set reported over 80 years ago by Frets (1921). The data [...] # give head measurements (in millimeters) for each of the first two adult sons # in 25 families. Here the family is the “individual” in our data set and the four # head measurements are the variables. The question that was of interest to Frets was # whether there is a relationship between the head measurements for pairs of sons? headsize <- read.table("http://www.iam.fmph.uniba.sk/ospm/Harman/data/headsize.txt", header=TRUE) print(headsize) plot(headsize) cor(headsize) headsize.std <- sweep(headsize, 2 , sqrt(apply(headsize,2,var)), FUN="/") # Poznamka: urobili sme normalizaciu na jednotkovy rozptyl, ale inak hodnota samotnych # kanonickych korelacii je invariantna vzhladom na linearnu transformaciu dat. S <- cov(headsize.std) S11 <- S[1:2, 1:2] S12 <- S[1:2, 3:4] S21 <- t(S12) S22 <- S[3:4,3:4] H1 <- solve(S11) %*% S12 %*% solve(S22) %*% S21 H2 <- solve(S22) %*% S21 %*% solve(S11) %*% S12 eigen(H1) eigen(H2) help(cancor) head.cc <- cancor(headsize.std[, 1:2], headsize.std[, 3:4]) head.cc A <- head.cc$xcoef B <- head.cc$ycoef attach(headsize.std) girth1 <- A[1,1] * L1 + A[2,1] * B1 girth2 <- B[1,1] * L2 + B[2,1] * B2 shape1 <- A[1,2] * L1 + A[2,2] * B1 shape2 <- B[1,2] * L2 + B[2,2] * B2 plot(girth1, girth2) plot(shape1, shape2) round(cor(cbind(girth1, girth2, shape1, shape2)), 5) # Otestujeme mnohorozmernu normalitu dat library(MVN) mvn(headsize, mvnTest = "mardia") mvn(headsize, mvnTest = "hz") mvn(headsize, mvnTest = "royston") # Mozeme s cistym svedomim pouzit Bartlettov test W <- det(diag(2) - H2) # =det(diag(2)-H1) Z<- -(25 - 0.5 * (2 + 2 + 3)) * log(W) 1 - pchisq(Z, df=4) ### Teraz uz len v rychlosti zavislost viacerych charakteristik IQ ### od viacerych fyzickych charakteristik cloveka # FSIQ, VIQ, PIQ ... vysledky trojice testov IQ # Weight, Height, MRI ... telesna hmotnost a vyska cloveka, MRI velkost mozgu merana pomocou MRI brain <- read.table("http://www.iam.fmph.uniba.sk/ospm/Harman/data/BRAIN.TXT", header=TRUE) print(brain) bf <- brain[brain$Gender=="Female",] bf <- bf[,3:7]; bf <- scale(bf) print(cancor(bf[,1:2], bf[,3:5])) mvn(bf, mvnTest = "mardia") S <- var(bf) S11 <- S[1:2, 1:2]; S12 <- S[1:2, 3:5] S21 <- S[3:5, 1:2]; S22 <- S[3:5, 3:5] W <- det(diag(3) - solve(S22)%*%S21%*%solve(S11)%*%S12) Z <- -(20 - (2+3+3)/2)*log(W) 1-pchisq(Z, df=6)