clust.gen <- function(x, k, pm, N) { # Maximization of the likelihood for normal-model clustering (into 2 clusters) # Method: A standard binary genetic algorithm with uniform crossovers and point mutations # x ... 2 x n matrix, with columns corresponding to the 2D vectors of features of n objects # k ... population size (should be even) # pm ... point mutation probability # N ... number of generations clust.mmv <- function(gamma, x) { # negative log likelihood to be minimized # gamma ... parameter from {0,1}^n corresponding to the clustering n <- ncol(x) n1 <- sum(gamma) n0 <- n - n1 x0 <- x[, gamma==0] x1 <- x[, gamma==1] S0 <- cov(t(x0)) S1 <- cov(t(x1)) return(n0 * log(det(S0)) + n1 * log(det(S1))) } n <- ncol(x) # Create the initial population Gamma.mut <- Gamma.tmp <- Gamma <- matrix(sample(c(0, 1), k*n, replace=TRUE), ncol=k) vals <- rep(0, k) for(i in 1:N) { # Evaluate the population for(j in 1:k) { vals[j] <- clust.mmv(Gamma[,j],x) } # Order the population by fitness o <- order(vals) Gamma <- Gamma[, o] # Plot the best individual (i.e. the best clustering) plot(t(x), col=Gamma[, 1] + 1, pch=19, main=round(abs(min(vals)), 3), xlab="x", ylab="y", asp=1) # Create a temporary generation by uniform crossovers of the best individuals for(r in 1:k) { mum.i <- sample(1:k, 1, prob=(k:1)^2) dad.i <- sample(1:k, 1, prob=(k:1)^2) cross.i <- sample(c(0, 1), n, replace=TRUE) Gamma.tmp[, r] <- cross.i * Gamma[, dad.i] + (1-cross.i) * Gamma[, mum.i] } Gamma <- Gamma.tmp # Create the new generation by mutating individuals in the temporary generation for(r in 1:k) { mut <- sample(c(0, 1), n, prob=c(1 - pm, pm), replace=TRUE) Gamma.mut[, r] <- (1 - mut) * Gamma[, r] + mut * (1 - Gamma[, r]) } Gamma <- Gamma.mut } return(Gamma[, 1]) } # Demo x1 <- rnorm(200, mean=-3, sd=1) y1 <- rnorm(200, mean=0, sd=1) x2 <- rnorm(200, mean=3, sd=1) y2 <- rnorm(200, mean=0, sd=1) clust.gen(rbind(c(x1, x2), c(y1, y2)), 20, 0.001, 2000) x1 <- rnorm(200, mean=0, sd=5) y1 <- rnorm(200, mean=0, sd=1) x2 <- rnorm(200, mean=0, sd=1) y2 <- rnorm(200, mean=0, sd=5) clust.gen(rbind(c(x1, x2), c(y1, y2)), 20, 0.001, 2000) x1 <- rnorm(200, mean=0, sd=5) y1 <- rnorm(200, mean=0, sd=5) x2 <- rnorm(200, mean=5, sd=0.5) y2 <- rnorm(200, mean=5, sd=0.5) clust.gen(rbind(c(x1, x2), c(y1, y2)), 20, 0.001, 2000)