od.D.REX <- function (Fx, supp.ini, ver=1, gamma=4, eff=1-1e-9, it.max=Inf, t.max=30) { # Computes a D-optimal approximate design of experiments on a finite design space # Can also be used to compute the minimum-volume data-enclosing ellipsoid # Based on the paper: https://arxiv.org/abs/1801.05661 # Authors: Radoslav Harman, Lenka Filova, Peter Richtarik # # Arguments: # Fx ... n times m(<=n) matrix containing all possible regressors (as rows), # that is, n is the number of design points, m(>=2) is the number of parameters # supp.ini ... support of the initial design (the ini. design is uniform on the # support); it must be chosen such that its information matrix is non-singular # ver ... version of the algorithm (0 = without the nullity control) # gamma ... parameter regulating the size of the exchange batch # eff ... threshold on the minumim design efficiency to stop the computation # it.max ... maximum allowed number of iterations # t.max ... threshold for the maximum computation time # # Output is the list with components: # w.best ... the resulting approximate design # Phi.best ... the D-criterion value of w.best # eff.best ... a lower bound on the efficiency of w.best wrt the perfect D-optimal design # n.iter ... number of iterations performed # t.act ... the actual time of computation # Example: A D-optimal design on 100000 random regressors in R^10 # n <- 100000; m <- 10; F.rnd <- matrix(rnorm(n*m), ncol=m) # res <- od.D.REX(F.rnd, sample(1:n, m)); supp <- res$w.best>0 # print(cbind((1:n)[supp], res$w.best[supp])) start <- as.numeric(proc.time()[3]); del <- 1e-14; eps <- 1e-24 Fx <- as.matrix(Fx); n <- nrow(Fx); m <- ncol(Fx) eff.inv <- 1/eff; n.iter <- 0; L <- min(n, gamma*m) lx.vec <- rep(0, L); index <- 1:n; one <- rep(1, m) supp <- supp.ini; K <- length(supp); Fx.supp <- Fx[supp, ] w <- rep(0, n); w[supp] <- 1 / length(supp); w.supp <- w[supp] M <- crossprod(sqrt(w.supp) * Fx.supp) d.fun <- ((Fx %*% t(chol(solve(M))))^2) %*% one / m ord <- order(d.fun, decreasing=TRUE) lx.vec <- sample(ord[1:L]); kx.vec <- sample(supp) while (TRUE) { n.iter <- n.iter + 1; ord1 <- which.min(d.fun[supp]) kb <- supp[ord1]; lb <- ord[1]; v <- c(kb, lb) cv <- Fx[v, ] %*% solve(M, t(Fx[v, ])) alpha <- 0.5 * (cv[2, 2] - cv[1, 1])/(cv[1, 1] * cv[2, 2] - cv[1, 2]^2 + del) alpha <- min(w[kb], alpha) w[kb] <- w[kb] - alpha; w[lb] <- w[lb] + alpha M <- M + alpha * (tcrossprod(Fx[lb, ]) - tcrossprod(Fx[kb, ])) if((w[kb] < del) && (ver==1)) { # LBE is nullifying and the version is 1 for(l in 1:L) { lx <- lx.vec[l]; Alx <- tcrossprod(Fx[lx, ]) for (k in 1:K) { kx <- kx.vec[k]; v <- c(kx, lx) cv <- Fx[v, ] %*% solve(M, t(Fx[v, ])) alpha <- 0.5 * (cv[2, 2] - cv[1, 1])/(cv[1, 1] * cv[2, 2] - cv[1, 2]^2 + eps) alpha <- min(w[kx], max(-w[lx], alpha)) wkx.temp <- w[kx] - alpha; wlx.temp <- w[lx] + alpha if((wkx.temp < del) || (wlx.temp < del)) { w[kx] <- wkx.temp; w[lx] <- wlx.temp M <- M + alpha * (Alx - tcrossprod(Fx[kx, ])) } } } } else { # LBE is non-nullifying or the version is 0 for(l in 1:L) { lx <- lx.vec[l]; Alx <- tcrossprod(Fx[lx, ]) for (k in 1:K) { kx <- kx.vec[k]; v <- c(kx, lx) cv <- Fx[v, ] %*% solve(M, t(Fx[v, ])) alpha <- 0.5 * (cv[2, 2] - cv[1, 1])/(cv[1, 1] * cv[2, 2] - cv[1, 2]^2 + del) alpha <- min(w[kx], max(-w[lx], alpha)) w[kx] <- w[kx] - alpha; w[lx] <- w[lx] + alpha M <- M + alpha * (Alx - tcrossprod(Fx[kx, ])) } } } supp <- index[w > del]; K <- length(supp); w.supp <- w[supp] d.fun <- ((Fx %*% t(chol(solve(M))))^2) %*% one / m ord.ind <- (1:n)[d.fun >= -sort(-d.fun, partial=L)[L]] ord <- ord.ind[order(d.fun[ord.ind], decreasing=TRUE)] # The two lines above can be replaced by simpler but usually # somewhat slower ord <- order(d.fun, decreasing=TRUE)[1:L] lx.vec <- sample(ord); kx.vec <- sample(supp) tm <- as.numeric(proc.time()[3]) eff.act <- 1 / d.fun[ord[1]] print(paste("Computation time:", round(tm-start, 2), "Lower bound on efficiency:", eff.act)) if ((d.fun[ord[1]] < eff.inv) || (n.iter >= it.max) || (tm > start + t.max)) break } t.act <- round(as.numeric(proc.time()[3]) - start, 2) info <- paste("D-opt algoritm 'REX' finished after", t.act, "seconds at", Sys.time()) info <- paste(info, "with", n.iter, "iterations."); print(info, quote = FALSE) Phi.best <- det(M)^(1/m); eff.best <- 1/d.fun[ord[1]] print(paste("D-criterion value:", Phi.best), quote = FALSE) print(paste("Efficiency at least:", eff.best), quote = FALSE) list(w.best=w, Phi.best=Phi.best, eff.best=eff.best, n.iter=n.iter, t.act=t.act) }