# nacitanie dat boston <- read.table("http://www.iam.fmph.uniba.sk/ospm/Filova/vsa/bostonh.dat") # popis dat # V1: miera kriminality # V2: podiel velkych pozemkov # V3: podiel velkoobchodnych priestorov # V4: Rieka Charles # V5: koncentracia oxidu dusiteho # V6: priemerny pocet izieb v obydli # V7: podiel zastavby starsej ako 1940 # V8: vazena vzdialenost od centier zamestnanosti # V9: index pristupnosti k dialniciam # V10: miera zdanenia # V11: pocet ucitelov na ziaka # V12: index afroamerickeho obyvatelstva # V13: percento populacie s nizkym prijmom # V14: medianova hodnota bytov v tisickach $ head(boston) # zakladny scatterplot plot(boston) # neprehladne, vsetky premenne naraz # vezmime si len X1-X5 a X14 plot(boston[,-c(5:13)]) #------------------------------------------------------ #boxplot boxplot(boston) #standardizacia z <- boston for(i in 1:14) z[, i] <- (boston[, i] - mean(boston[, i]))/(sqrt(var(boston[, i]))) boxplot(boston) #------------------------------------------------------- #transformacia dat xt <- boston xt[, 1] <- log(boston[, 1]) xt[, 2] <- boston[, 2]/10 xt[, 3] <- log(boston[, 3]) xt[, 5] <- log(boston[, 5]) xt[, 6] <- log(boston[, 6]) xt[, 7] <- ((boston[, 7]^2.5))/10000 xt[, 8] <- log(boston[, 8]) xt[, 9] <- log(boston[, 9]) xt[, 10] <- log(boston[, 10]) xt[, 11] <- exp(0.4 * boston[, 11])/1000 xt[, 12] <- boston[, 12]/100 xt[, 13] <- sqrt(boston[, 13]) xt[, 14] <- log(boston[, 14]) tablex = rbind(t(apply(boston, 2, mean)), t(apply(boston, 2, median)), t((apply(boston, 2, sd))^2), t(apply(boston, 2, sd))) print(" Mean Median Variance SE") t(round(tablex, 4)) #--------------------------------------------------------- # kovariancna a korelacna matica print(cov(boston)) print(cor(boston)) library(corrplot) corrplot.mixed(cor(boston), lower = "number", lower.col = "black", upper = "ellipse", tl.pos="lt") tablext = cbind(apply(xt, 2, mean), apply(xt, 2, median), (apply(xt, 2, sd))^2, apply(xt, 2, sd)) print(" Mean Median Variance SE") round(tablext, 4) print(cov(xt)) print(cor(xt)) # boxplot pre transformovane data zt <- xt for(i in 1:14) zt[, i] <- (xt[, i] - mean(xt[, i]))/(sqrt(var(xt[, i]))) par(mfrow=c(2,1)) boxplot(z) boxplot(zt) #contour plot dvoj(troj-)rozmernej zdruzenej hustoty library(KernSmooth) #kniznica pre jadrove odhady hustoty d <- bkde2D(boston[, 5:6], bandwidth = 1.06*c(sd(boston[, 5]), sd(boston[, 6]))* 200^(-1/5)) contour(d$x1, d$x2, d$fhat, col = c("blue", "black", "yellow", "cyan", "red", "magenta", "green", "blue","black"), lwd=3, cex.axis = 1) library(misc3d) d <- kde3d(boston[, 5], boston[, 6], boston[, 7], n = 15) contour3d(d$d, level = c(max(d$d[10,10,])*.02, max(d$d[10,10,])*.5, max(d$d[10,10,])*1.3), fill = c(FALSE, FALSE, TRUE), col.mesh = c("darkgreen","red","darkblue") , engine = "standard", screen=list(z=210,x=-40,y=-295), scale=TRUE) #------------------------------------------------------- # krajsia grafika, napriklad pomocou plot3d # dalsie uzitocne baliky: rgl, ggplot2 install.packages("plot3D") library(plot3D) x <- boston$V1 y <- boston$V13 z <- boston$V14 scatter3D(x, y, z, bty = "g", colkey = FALSE, main ="bty= 'g'") scatter3D(x, y, z, bty = "g", pch = 18, col = gg.col(100)) scatter3D(x, y, z, phi = 0, bty = "g", type = "l", ticktype = "detailed", lwd = 4) scatter3D(x, y, z, phi = 0, bty = "g", type = "h", ticktype = "detailed", pch = 19, cex = 0.5) CI <- list(z = matrix(nrow = length(x), data = cbind(x-0.1*rnorm(length(x)), x+0.1*rnorm(length(x))))) head(CI$z) scatter3D(x, y, z, phi = 0, bty = "g", col = gg.col(100), pch = 18, CI = CI) ##### library(rgl) plot3d(x,y,z,type="s",col=rainbow(n=10),size=1,lwd=5,box=F) M <- par3d("userMatrix") st<-function(n,t){ for (i in 1:n){ play3d( par3dinterp( userMatrix=list(M,rotate3d(M, pi, 1, 0, 0) ) ), duration=t ) play3d( par3dinterp( userMatrix=list(M,rotate3d(M, pi, 0, 1, 1) ) ), duration=t) play3d( par3dinterp( userMatrix=list(M,rotate3d(M, pi, 0, 1, 0) ) ), duration=t) play3d( par3dinterp( userMatrix=list(M,rotate3d(M, pi, 1, 0, 1) ) ), duration=t) } } st(20,2) myHist <- function(x, ...) { par(new=TRUE); hist(x, ..., main="") } b1 <- as.data.frame(cbind(x,y,z)) pairs(b1, diag.panel=myHist, main="Scatter plot matrix", pch=16) #-------------------------------------------------------- # PCP: parallel coordinate plot # vsetky premenne sa najskor naskaluju medzi 0 a 1 # hor. os: suradnice j (j=1,...,p) # vert.os: hodnota skalovanej premennej xij # pouzitie: # 1. korelacia: ak su ciary takmer rovnobezne--> kladna lin.zavislost (MVApcp2) # ak sa pretinaju cca v strede: negativna lin.zavislost (MVApcp3) # 2. detekcia podskupin: ciary konvergujuce k roznym bodom (MVApcp4) library(MASS) z <- boston[, 14] z[boston[, 14] <= median(boston[, 14])] <- 1 z[boston[, 14] > median(boston[, 14])] <- 2 parcoord(boston[, seq(1, 14, 1)], lty = z, lwd = 0.7, col = z, main = "PCP for Boston Housing", frame = TRUE) axis(side = 2, at = seq(0, 1, 0.2), labels = seq(0, 1, 0.2)) # tu je farba podla toho, ci je hodnota pod alebo nad medianom #----------------------------------------------------- #Chernoff-Flury faces :) library(aplpack) xx <- boston[83:118,] ncolors <- 30 faces(xx, nrow = 6,face.type=1,scale=TRUE, col.nose = rainbow(ncolors), col.eyes = rainbow(ncolors, start = 0.6, end = 0.85), col.hair = terrain.colors(ncolors), col.face = heat.colors(ncolors), col.lips = rainbow(ncolors, start = 0, end = 1), col.ears = rainbow(ncolors, start = 0, end = 0.8), plot.faces = TRUE)