od.REX.D <- function (F, supp.ini, ver=1, gamma=4, eff=1-1e-9, t.max=30) 
{
    # Computes a D-optimal approximate design of experiments on a finite design space
    # F ... an n times m(<=n) matrix containing all possible regressors (as rows),
    #       that is, n is the number of design points, m(>=2) is the number of parameters
    # supp.ini ... the support of the initial design (the initial design is uniform on the support)
    #       it must be chosen such that its information matrix is non-singular
    # ver ... the version of the algorithm (0 = without the nullity control)
    # gamma ... a parameter regulating the size of the exchange batch
    # eff ... a threshold on the minumim design efficiency to stop the computation
    # t.max ... a threshold for the maximum computation time
    #
    # Based on the paper: https://arxiv.org/abs/1801.05661
    # Authors: Harman, Filova, Richtarik
    
    # Example: A D-optimal design on 100000 random regressors in R^10
    # n <- 100000; m <- 10; F.rnd <- matrix(rnorm(n*m), ncol=m)
    # res <- od.REX.D(F.rnd, sample(1:n, m)); supp <- res$w.best>0
    # print(cbind((1:n)[supp], res$w.best[supp]))
    
    start <- as.numeric(proc.time()[3]); del <- 1e-14; eps <- 1e-24
    F <- as.matrix(F); n <- dim(F)[1]; m <- dim(F)[2]
    prof <- matrix(0, ncol=2, nrow=0)
    
    eff.inv <- 1/eff; n.iter <- 0; L <- min(n, gamma*m)
    lx.vec <- rep(0, L); index <- 1:n; one <- t(rep(1, m))
    
    supp <- supp.ini; K <- m; F.supp <- F[supp, ]
    w <- rep(0, n); w[supp] <- 1 / length(supp); w.supp <- w[supp]
    M <- t((w.supp %*% one) * F.supp) %*% F.supp
    d.fun <- apply((F %*% solve(M)) * F, 1, sum) / m 
    ord <- order(d.fun, decreasing=TRUE)
    lx.vec <- sample(ord[1:L]); kx.vec <- sample(supp)
    
    while (TRUE) {
        prof <- rbind(prof, c(as.numeric(proc.time()[3])-start, det(M)^(1/m)))
        
        n.iter <- n.iter + 1; ord1 <- which.min(d.fun[supp])
        kb <- supp[ord1]; lb <- ord[1]; v <- c(kb, lb)
        cv <- F[v, ] %*% solve(M, t(F[v, ]))
        alpha <- 0.5 * (cv[2, 2] - cv[1, 1])/(cv[1, 1] * cv[2, 2] - cv[1, 2]^2 + del)
        alpha <- min(w[kb], alpha)
        w[kb] <- w[kb] - alpha; w[lb] <- w[lb] + alpha
        M <- M + alpha * (F[lb, ] %*% t(F[lb, ]) - F[kb, ] %*% t(F[kb, ]))
        
        if((w[kb] < del) && (ver==1)) { # LBE is nullifying and the version is 1
            for(l in 1:L) {                          
                lx <- lx.vec[l]; Alx <- F[lx, ] %*% t(F[lx, ])
                for (k in 1:K) {
                    kx <- kx.vec[k]; v <- c(kx, lx)
                    cv <- F[v, ] %*% solve(M, t(F[v, ]))
                    alpha <- 0.5 * (cv[2, 2] - cv[1, 1])/(cv[1, 1] * cv[2, 2] - cv[1, 2]^2 + eps)
                    alpha <- min(w[kx], max(-w[lx], alpha))
                    wkx.temp <- w[kx] - alpha; wlx.temp <- w[lx] + alpha
                    if((wkx.temp < del) || (wlx.temp < del)) {
                        w[kx] <- wkx.temp; w[lx] <- wlx.temp
                        M <- M + alpha * (Alx - F[kx, ] %*% t(F[kx, ]))                            
                    }
                }
            }
        } else { # LBE is non-nullifying or the version is 0
            for(l in 1:L) {                          
                lx <- lx.vec[l]; Alx <- F[lx, ] %*% t(F[lx, ])
                for (k in 1:K) {
                    kx <- kx.vec[k]; v <- c(kx, lx)
                    cv <- F[v, ] %*% solve(M, t(F[v, ]))
                    alpha <- 0.5 * (cv[2, 2] - cv[1, 1])/(cv[1, 1] * cv[2, 2] - cv[1, 2]^2 + del)
                    alpha <- min(w[kx], max(-w[lx], alpha))
                    w[kx] <- w[kx] - alpha; w[lx] <- w[lx] + alpha
                    M <- M + alpha * (Alx - F[kx, ] %*% t(F[kx, ]))                            
                }
            }         
        }
        
        supp <- index[w > del]; K <- length(supp); w.supp <- w[supp]
        d.fun <- apply((F %*% solve(M)) * F, 1, sum) / m
        ord <- order(d.fun, decreasing=TRUE)
        lx.vec <- sample(ord[1:L]); kx.vec <- sample(supp)

        tm <- as.numeric(proc.time()[3])
        eff.act <-  1 / d.fun[ord[1]]
        print(paste("Computation time:", round(tm-start, 2), "Lower bound on efficiency:", eff.act)) 
        
        if ((d.fun[ord[1]] < eff.inv) || (tm > start + t.max)) break
    }
    
    t.act <- round(as.numeric(proc.time()[3]) - start, 2)
    info <- paste("D-opt algoritm 'REX' finished after", t.act, "seconds at", Sys.time())
    info <- paste(info, "with", n.iter, "iterations.")
    Phi.best <- det(M)^(1/m); eff.best <- 1/d.fun[ord[1]]
    print(info, quote = FALSE); print(paste("D-criterion value:", Phi.best), quote = FALSE)
    print(paste("Efficiency at least:", eff.best), quote = FALSE)
    
    list(w.best=w, Phi.best=Phi.best, eff.best=eff.best, iter=n.iter, t.act=t.act, prof=prof)
}