od.A.REX <- function (Fx, supp.ini, ver=1, gamma=1, eff=1-1e-9, it.max=Inf, t.max=30) 
{
    # Computes an A-optimal approximate design of experiments on a finite design space
    # Note that this function can be used also for computing I-optimal designs because
    # the I-optimal design problem can be directly converted to a problem of A-optimality
    # Based on the paper: https://arxiv.org/abs/1801.05661
    # Authors: Radoslav Harman, Lenka Filova, Peter Richtarik
    #
    # Arguments:
    # Fx ... n times m(<=n) matrix containing all possible regressors (as rows),
    #        that is, n is the number of design points, m(>=2) is the number of parameters
    # supp.ini ... support of the initial design (the ini. design is uniform on the
    #        support); it must be chosen such that its information matrix is non-singular
    # ver ... version of the algorithm (0 = without the nullity control)
    # gamma ... parameter regulating the size of the exchange batch
    # eff ... threshold on the minumim design efficiency to stop the computation
    # it.max ... maximum allowed number of iterations
    # t.max ... threshold for the maximum computation time
    #
    # Output is the list with components:
    # w.best ... the resulting approximate design
    # Phi.best ... the A-criterion value of w.best
    # eff.best ... a lower bound on the efficiency of w.best wrt the perfect D-optimal design
    # n.iter ... number of iterations performed
    
    # Example: An A-optimal design on 50000 random regressors in R^10
    # n <- 50000; m <- 10; F.rnd <- matrix(rnorm(n*m), ncol=m)
    # res <- od.A.REX(F.rnd, sample(1:n, m)); supp <- res$w.best>0
    # print(cbind((1:n)[supp], res$w.best[supp]))
    
    stepsize <- function (M, w, v)
    {
        # Computes the A-optimal stepsize alpha for the weight exchange:
        # w[v[1]] <- w[v[1]] - alpha; w[v[2]] <- w[v[2]] + alpha
        
        M.inv <- solve(M)
        dv <- Fx[v, ] %*% M.inv %*% t(Fx[v, ]) 
        av <- Fx[v, ] %*% M.inv %*% M.inv %*% t(Fx[v, ])  
        A <- av[2,2] - av[1,1]; C <- dv[2,2] - dv[1,1]; D <- dv[1,1] * dv[2,2] - dv[1,2]^2  
        B <- 2 * dv[1,2] * av[1,2] - dv[2,2] * av[1,1] - dv[1,1] * av[2,2]  
        G <- A * D + B * C; k <- v[1]; l <- v[2]
        
        if ((abs(G) < del.alpha) && (abs(B) > del.alpha)) {
            r <- -A / (2*B)  
            if ((-w[l] <= r) && (r <= w[k])) return(r)  
        }
        if (abs(G) > 0){  
            r <- -(B + sqrt(B^2 - A * G)) / G
            if ((abs(r) < del.alpha) && (B < 0)) {
                x <- A * G / B^2
                r <- -B * (x^3/16 + x^2/8 + x/2) / G
            }
            if ((-w[l] <= r) && (r <= w[k])) return(r) 
        }
        
        if (A > del.alpha) {
            return(w[k])
        } else if (A < -del.alpha) {
            return(-w[l])
        } else {
            return(0)
        }
    }
    
    start <- as.numeric(proc.time()[3]); del <- 1e-24; del.alpha <- 1e-14
    Fx <- as.matrix(Fx); n <- nrow(Fx); m <- ncol(Fx)
    eff.inv <- 1/eff; n.iter <- 0; L <- min(n, gamma*m)
    lx.vec <- rep(0, L); index <- 1:n; one <- rep(1, m)
    
    supp <- supp.ini; K <- length(supp); Fx.supp <- Fx[supp, ]
    w <- rep(0, n); w[supp] <- 1 / length(supp); w.supp <- w[supp]
    M <- crossprod(sqrt(w.supp) * Fx.supp)
    M.inv <- solve(M); G <- Fx %*% M.inv 
    a.fun <- (G^2) %*% one; ord <- order(a.fun, decreasing=TRUE)
    lx.vec <- sample(ord[1:L]); kx.vec <- sample(supp)
    
    while (TRUE) {

        n.iter <- n.iter + 1; ord1 <- which.min(a.fun[supp])
        kb <- supp[ord1]; lb <- ord[1]; v <- c(kb, lb)
        alpha <- stepsize(M, w, v)
        w[kb] <- w[kb] - alpha; w[lb] <- w[lb] + alpha
        M <- M + alpha * (tcrossprod(Fx[lb, ]) - tcrossprod(Fx[kb, ]))
        
        if((w[kb] < del) && (ver==1)) { # LBE is nullifying and the version is 1
            for(l in 1:L) {                          
                lx <- lx.vec[l]; Alx <- tcrossprod(Fx[lx, ])
                for (k in 1:K) {
                    kx <- kx.vec[k]; v <- c(kx, lx)
                    alpha <- stepsize(M, w, v)
                    wkx.temp <- w[kx] - alpha; wlx.temp <- w[lx] + alpha
                    if((wkx.temp < del) || (wlx.temp < del)) {
                        w[kx] <- wkx.temp; w[lx] <- wlx.temp
                        M <- M + alpha * (Alx - tcrossprod(Fx[kx, ]))                            
                    }
                }
            }
        } else { # LBE is non-nullifying or the version is 0
            for(l in 1:L) {                          
                lx <- lx.vec[l]; Alx <- tcrossprod(Fx[lx, ])
                for (k in 1:K) {
                    kx <- kx.vec[k]; v <- c(kx, lx)
                    alpha <- stepsize(M, w, v)
                    w[kx] <- w[kx] - alpha; w[lx] <- w[lx] + alpha
                    M <- M + alpha * (Alx - tcrossprod(Fx[kx, ]))                            
                }
            }         
        }
        
        supp <- index[w > del]; K <- length(supp); w.supp <- w[supp]
        M.inv <- solve(M); G <- Fx %*% M.inv
        a.fun <- (G^2) %*% one
        ord.ind <- (1:n)[a.fun >= -sort(-a.fun, partial=L)[L]]
        ord <- ord.ind[order(a.fun[ord.ind], decreasing=TRUE)]
        # The two lines above can be replaced by simpler but usually
        # somewhat slower ord <- order(a.fun, decreasing=TRUE)[1:L]
        lx.vec <- sample(ord); kx.vec <- sample(supp)
        
        tm <- as.numeric(proc.time()[3])
        eff.act <-  sum(diag(M.inv)) / a.fun[ord[1]]
        print(paste("Computation time:", round(tm-start, 2), "Lower bound on efficiency:", eff.act)) 
        if((1/eff.act < eff.inv) || (n.iter >= it.max) || (tm > start + t.max)) break
    }
    
    t.act <- round(as.numeric(proc.time()[3]) - start, 2)
    info <- paste("A-opt algoritm 'REX' finished after", t.act, "seconds at", Sys.time())
    info <- paste(info, "with", n.iter, "iterations."); print(info, quote = FALSE)
    Phi.best <- 1/sum(diag(M.inv)); eff.best <- sum(diag(M.inv))/a.fun[ord[1]]
    print(paste("A-criterion value:", Phi.best), quote = FALSE)
    print(paste("Efficiency at least:", eff.best), quote = FALSE)
    
    list(w.best=w, Phi.best=Phi.best, eff.best=eff.best, iter=n.iter, t.act=t.act)
}