od.REX.A <- function (F, supp.ini, ver=1, gamma=1, eff=1-1e-9, t.max=30) { # Computes an A-optimal approximate design of experiments on a finite design space # Note that this procedure can be easily used also for computing I-optimal designs # because I-optimal design problem can be directly converted to a problem of A-optimality # see, e.g., https://arxiv.org/abs/1307.4953 Appendix A.3 # # F ... an n times m(<=n) matrix containing all possible regressors (as rows), # that is, n is the number of design points, m(>=2) is the number of parameters # supp.ini ... the support of the initial design (the initial design is uniform on the support) # it must be chosen such that its information matrix is non-singular # ver ... the version of the algorithm (0 = without the nullity control) # gamma ... a parameter regulating the size of the exchange batch # eff ... a threshold on the minumim design efficiency to stop the computation # t.max ... a threshold for the maximum computation time # # Based on the paper: https://arxiv.org/abs/1801.05661 # Authors: Harman, Filova, Richtarik # Example: An A-optimal design on 50000 random regressors in R^10 # n <- 50000; m <- 10; F.rnd <- matrix(rnorm(n*m), ncol=m) # res <- od.REX.A(F.rnd, sample(1:n, m)); supp <- res$w.best>0 # print(cbind((1:n)[supp], res$w.best[supp])) stepsize <- function (M, w, v) { # Computes the A-optimal stepsize alpha for the weight exchange: # w[v[1]] <- w[v[1]] - alpha; w[v[2]] <- w[v[2]] + alpha M.inv <- solve(M) dv <- F[v, ] %*% M.inv %*% t(F[v, ]) av <- F[v, ] %*% M.inv %*% M.inv %*% t(F[v, ]) A <- av[2,2] - av[1,1]; C <- dv[2,2] - dv[1,1]; D <- dv[1,1] * dv[2,2] - dv[1,2]^2 B <- 2 * dv[1,2] * av[1,2] - dv[2,2] * av[1,1] - dv[1,1] * av[2,2] G <- A * D + B * C; k <- v[1]; l <- v[2] if ((abs(G) < del.alpha) && (abs(B) > del.alpha)) { r <- -A / (2*B) if ((-w[l] <= r) && (r <= w[k])) return(r) } if (abs(G) > 0){ r <- -(B + sqrt(B^2 - A * G)) / G if ((abs(r) < del.alpha) && (B < 0)) { x <- A * G / B^2 r <- -B * (x^3/16 + x^2/8 + x/2) / G } if ((-w[l] <= r) && (r <= w[k])) return(r) } if (A > del.alpha) { return(w[k]) } else if (A < -del.alpha) { return(-w[l]) } else { return(0) } } start <- as.numeric(proc.time()[3]); del <- 1e-24; del.alpha <- 1e-14 F <- as.matrix(F); n <- dim(F)[1]; m <- dim(F)[2] prof <- matrix(0, ncol=2, nrow=0) eff.inv <- 1/eff; n.iter <- 0; L <- min(n, gamma*m) lx.vec <- rep(0, L); index <- 1:n; one <- t(rep(1, m)) supp <- supp.ini; K <- m; F.supp <- F[supp, ] w <- rep(0, n); w[supp] <- 1 / length(supp); w.supp <- w[supp] M <- t((w.supp %*% one) * F.supp) %*% F.supp M.inv <- solve(M); G <- F %*% M.inv a.fun <- apply(G * G, 1, sum) ord <- order(a.fun, decreasing=TRUE) lx.vec <- sample(ord[1:L]); kx.vec <- sample(supp) while (TRUE) { prof <- rbind(prof, c(as.numeric(proc.time()[3])-start, 1/sum(diag(M.inv)))) n.iter <- n.iter + 1; ord1 <- which.min(a.fun[supp]) kb <- supp[ord1]; lb <- ord[1]; v <- c(kb, lb) alpha <- stepsize(M, w, v) w[kb] <- w[kb] - alpha; w[lb] <- w[lb] + alpha M <- M + alpha * (F[lb, ] %*% t(F[lb, ]) - F[kb, ] %*% t(F[kb, ])) if((w[kb] < del) && (ver==1)) { # LBE is nullifying and the version is 1 for(l in 1:L) { lx <- lx.vec[l]; Alx <- F[lx, ] %*% t(F[lx, ]) for (k in 1:K) { kx <- kx.vec[k]; v <- c(kx, lx) alpha <- stepsize(M, w, v) wkx.temp <- w[kx] - alpha; wlx.temp <- w[lx] + alpha if((wkx.temp < del) || (wlx.temp < del)) { w[kx] <- wkx.temp; w[lx] <- wlx.temp M <- M + alpha * (Alx - F[kx, ] %*% t(F[kx, ])) } } } } else { # LBE is non-nullifying or the version is 0 for(l in 1:L) { lx <- lx.vec[l]; Alx <- F[lx, ] %*% t(F[lx, ]) for (k in 1:K) { kx <- kx.vec[k]; v <- c(kx, lx) alpha <- stepsize(M, w, v) w[kx] <- w[kx] - alpha; w[lx] <- w[lx] + alpha M <- M + alpha * (Alx - F[kx, ] %*% t(F[kx, ])) } } } supp <- index[w > del]; K <- length(supp); w.supp <- w[supp] M.inv <- solve(M); G <- F %*% M.inv a.fun <- apply(G * G, 1, sum) ord <- order(a.fun, decreasing=TRUE) lx.vec <- sample(ord[1:L]); kx.vec <- sample(supp) tm <- as.numeric(proc.time()[3]) eff.act <- sum(diag(M.inv)) / a.fun[ord[1]] print(paste("Computation time:", round(tm-start, 2), "Lower bound on efficiency:", eff.act)) if((1/eff.act < eff.inv) || (tm > start + t.max)) break } t.act <- round(as.numeric(proc.time()[3]) - start, 2) info <- paste("A-opt algoritm 'REX' finished after", t.act, "seconds at", Sys.time()) info <- paste(info, "with", n.iter, "iterations.") Phi.best <- 1/sum(diag(M.inv)); eff.best <- sum(diag(M.inv))/a.fun[ord[1]] print(info, quote = FALSE); print(paste("A-criterion value:", Phi.best), quote = FALSE) print(paste("Efficiency at least:", eff.best), quote = FALSE) list(w.best=w, Phi.best=Phi.best, eff.best=eff.best, iter=n.iter, t.act=t.act, prof=prof) }