# Predmet: "Analyza zhlukov a klasifikacia dat" # Studijny program: "Pravdepodobnost a matematicka statistika" # Vyucujuci: Radoslav Harman, KAMS FMFI UK Bratislava # Analyza zhlukov 3: hierarchicke zhlukovanie # Data o extrasolarnych planetach (podrobnosti k datam boli minule) planets <- read.csv("http://www.iam.fmph.uniba.sk/ospm/Harman/data/planets.csv") planets <- planets[planets$pl_orbper < 600, ]; n <- nrow(planets) for (i in c(4,5,8)) planets[, i] <- log(planets[, i]) planets[, 7] <- (planets[, 7])^(1/3) planets[, 4:8] <- scale(planets[, 4:8]) # Specializovane procedury na HK nam ponuka kniznica cluster library(cluster) # Aglomerativne zhlukovanie help(agnes) # Data pred aplikaciou HK standardizujeme a pouzijeme metodu "average" na meranie vzdialenosti zhlukov plan.agnes1 <- agnes(planets[, 4:8], stand = TRUE, method = "average") summary(plan.agnes1) # Uplnu reprezentaciu dendrogramu poskytne zlozka merge # -cislo znamena, ze zlucujeme objekt "cislo" # (+)cislo znamena, ze spajame uz cely zhluk objektov, vytvoreny v riadku "cislo" matice plan.agnes1$merge # Dendrogram si mozeme nakreslit plot(plan.agnes1, which.plots = 2, hang = -1) help(plot.agnes1) # Porovnajme vysledok pouzitim inych metod merania vzdialenosti medzi zhlukmi plan.agnes2 <- agnes(planets[, 4:8], stand = TRUE, method = "single") plot(plan.agnes2, which.plots = 2, hang = -1) plan.agnes3 <- agnes(planets[, 4:8], stand = TRUE, method = "complete") plot(plan.agnes3, which.plots = 2, hang = -1) # Divizivne zhlukovanie help(diana) # Data pred aplikaciou HK standardizujeme. Divizivne zhlukovanie nemeria vzdialenosti zhlukov plan.diana <- diana(planets[, 4:8], stand = TRUE) summary(plan.diana) plot(plan.diana, which.plots = 2, hang = -1)