Lineárna regresia

Uvažujme model \[ y_i = f^T(x_i)\theta + \varepsilon_i,\ i=1,...,N,\] kde

\(y_i\) sú pozorovania

\(f(x)\) je daný vektor regresných funkcií

\(\theta\) je vektor neznámych parametrov

\(\varepsilon_i\) sú nekorelované homoskedastické chyby pozorovaní.

Príklad 1: priamková regresia

Zadanie vektora regresných funkcií pre priamkovú regresiu \(y_i=\alpha_i+\beta_i x_i+\varepsilon_i\)

f <- function(x) c(1,x)

Regresná funkcia

eta <- function(x,theta) t(f(x))%*%theta

Konštrukcia matice plánu \[ F=\begin{bmatrix} 1 & \cdots & 1\\ x_1 & \cdots & x_N \end{bmatrix} \]

F <- function(X){
  N <- length(X)
  m <- length(f(X[1]))
  res <- matrix(rep(0,N*m), nrow=N, ncol=m)
  for(i in 1:N)  res[i,] <- f(X[i])
  res
}

Vygenerujeme dáta so skutočnou hodnotou parametra \(\theta\), ktorú pri reálnych aplikáciách nepoznáme

theta <- c(pi,0.2) 
sigma <- 4

X <- seq(from=0, to=10, by=0.05)
Y <- NULL
for(i in c(1:length(X))) Y[i] <- eta(X[i], theta)

epsilon <- rnorm(length(X), 0, sigma)
Y <- Y + epsilon

Vypočítame odhad metódou najmenších štvorcov

m <- length(f(X[1]))
N <- length(X)

M <- t(F(X))%*%F(X)
thetahat <- solve(M, t(F(X))%*%Y)
nu <- Y - F(X)%*%thetahat
s2 <- norm(nu,type="F")^2/(N - m)

thetahat

##           [,1]
## [1,] 2.1812721
## [2,] 0.3463895

s2

## [1] 15.14059

sum(nu)

## [1] 1.361133e-13

Vykreslenie

Ytrue <- Yhat <- NULL
osX <- seq(from = min(X), to = max(X), by = 0.05)
for(i in 1:length(osX)){
Ytrue[i] <- eta(osX[i],theta) # regresna priamka pri skutocnom theta
Yhat[i]  <- eta(osX[i],thetahat) # regresna priamka pri odhadnutom theta
}
plot(X, Y, pch=19,col="black", xlim=c(0,10), ylim=c(min(Y),max(Y)), xlab="X", ylab="Y")
lines(osX, Ytrue, col="red", lwd=3, lty="dashed")
lines(osX, Yhat, col="black", lwd=2)

Uvažujme teraz situáciu, že \(W\) nie je jednotková matica

library(MASS)
W <- diag(1:N)
#W <- matrix(rnorm(N^2), nrow=N)
#W <- 0.1 * W %*% t(W)
#v <- rnorm(N)
#W <- diag(N) + N * v %*% t(v)

epsilon2 <- mvrnorm(1, rep(0,N), W)
Y2 <- Y + epsilon2
mod2 <- lm(formula = Y2~F(X)-1) 
M2 <- t(F(X))%*% W %*% F(X)
thetahat2 <- solve(M2, t(F(X))%*%Y2)
thetahat

##           [,1]
## [1,] 2.1812721
## [2,] 0.3463895

thetahat2

##              [,1]
## [1,]  0.081955215
## [2,] -0.006924778

Yhat2 <- NULL
for(i in 1:length(osX)){
  Yhat2[i]  <- eta(osX[i], thetahat2) # regresna priamka pri odhadnutom theta
}

plot(X, Y2, pch=19,col="black", xlim=c(0,10), ylim=c(min(Y),max(Y)), xlab="X", ylab="Y")
lines(osX, Ytrue, col="red", lwd=3, lty="dashed")
lines(osX, Yhat2, col="blue", lwd=2)

Príklad 2

f <- function(x) c(1, x)
eta <- function(x, theta) t(f(x))%*%theta

theta <- c(pi,0.2, 1) 
sigma <- 2

X1 <- seq(from=0, to=10, by=0.25)
X2 <- seq(from=0, to=10, by=0.25)
X <- as.matrix(expand.grid(X1, X2))
F <- cbind(1, X)

Y <- NULL
for(i in 1:dim(X)[1]) Y[i] <- eta(X[i,], theta)

epsilon <- rnorm(dim(X)[1], 0, sigma)
Y <- Y + epsilon

m <- length(f(X[1,]))
N <- dim(X)[1]

mod<-lm(formula=Y~X) 

M <- t(F)%*%F
thetahat <- solve(M,t(F)%*%Y)
nu <- Y - F%*%thetahat
s2 <- norm(nu,type="F")^2/(N - m)

thetahat

##           [,1]
##      3.0462280
## Var1 0.1950815
## Var2 1.0231278

s2

## [1] 4.145054

sum(nu)

## [1] -2.125633e-12

library(rgl)
x <- F[,2]; y <- F[,3]; z <- Y
open3d()

## wgl 
##   1

rgl.spheres(x, y, z, r = 0.2, color = "#D95F02") 
aspect3d(1,1,1)
axes3d()
fit <- lm(z ~ x + y)
coefs <- coef(fit)
a <- coefs["x"]; b <- coefs["y"]; c <- -1
d <- coefs["(Intercept)"]
rgl.planes(a, b, c, d, alpha=0.5, color = "red")

You must enable Javascript to view this page properly.

Lineárna regresia

Lenka Filova

Príklad 1: priamková regresia

Príklad 2