library(misc3d)
## Warning: package 'misc3d' was built under R version 3.6.1
library(rgl)
Vyrobíme si funkciu na výpočet hustoty dvojrozmerného normálneho rozdelenia:
hustota<-function(a=0,b=0,s1,s2,rho){
x <- seq(-2,2, length= 40); y <- x
f<-function (x,y){
xoy = ((x-a)^2/s1^2 - 2 * rho * (x-a) * (y-b)/(s1*s2) + (y-b)^2/s2^2)/(2 * (1 - rho^2))
density = exp(-xoy)/(2 * pi * s1*s2*sqrt(1 - rho^2))
density
}
x<-y<-seq(-4,4,length=100)
z<-outer(x,y,function(x,y)f(x,y))
return(list(x=x,y=y,z=z))
}
\(\mu=(0,0)'\), \(\Sigma=I_2\)
\(\mu=(0,0)'\), \(\Sigma=\begin{bmatrix}4&0\\0&1\end{bmatrix}\)
\(\mu=(0,0)'\), \(\Sigma=\begin{bmatrix}1&0.6\\0.6&1\end{bmatrix}\)
\(\mu=(0,0)'\), \(\Sigma=\begin{bmatrix}4&2.4\\2.4&1\end{bmatrix}\)
mu <- c(0,1)
Sigma <- matrix(c(1,0.5,0.5,1),2,2)
n <- 3000
library(MASS)
biv <- mvrnorm(n,mu,Sigma)
Sbiv <- cov(biv) #vyberova kovariancna matica
mu <- colMeans(biv) #MMV pre priemer
Sigma <- (n-1)*Sbiv/n #MMV pre S
plot(x=biv[,1], y=biv[,2], pch=16, col=heat.colors(50))
Vezmime si dvojrozmerné normálne rozdelenie a vykreslime marginálne hustoty a podmienenú hustotu \(X_2|X_1=0.5\).
library(mvtnorm)
Sigma <- matrix(c(1,-0.7,-0.7,1), nrow=2)
x <- seq(-3,3,0.01)
contour(x,x,outer(x,x,function(x,y){dmvnorm(cbind(x,y),sigma=Sigma)}))
abline(v=.5, lwd=2, col = "blue")
text(0.65, 2, labels=expression(x[1]==0.5), col = "blue", pos = 4)
# podmienene rozdelenie X2 | X1 = 0.5
y <- dnorm(x, mean = -0.7 * 0.5, sd = sqrt(1 - 0.7^2))
lines(y-abs(min(x)),x,lwd=2, col = "blue")
# marginalne rozdelenia
m1 <- m2 <- dnorm(x, 0, 1)
lines(x, m1 - abs(min(x)))
lines(m2 - abs(min(x)), x)
contour(x,x,outer(x,x,function(x,y){dmvnorm(cbind(x,y),sigma=Sigma)}))
cond <- function(x, cx) dnorm(x, mean = -0.7 * cx, sd = sqrt(1 - 0.7^2))
for (i in seq(-2, 2, by = 0.25)){
col <- colors()[grep("blue",colors())][2*i + 7]
lines(cond(x, i) + i, x, lwd = 2, col = col)
abline(v = i, lwd=1, col = col)
}
Uvažujme štvorrozmerné normálne rozdelenie s parametrami
\(\mu=(2,-1,3,1)'\), \(\Sigma=\begin{bmatrix} 7&3&-3&2\\3&6&0&4\\-3&0&5&-2\\2&4&-2&4\end{bmatrix}\).
Nájdeme podmienené rozdelenie \((X_1, X_2)|((X_3,X_4)=(1,1))\).
mean <- c(2, -1, 3, 1)
sigma <- rbind(c(7, 3, -3, 2), c(3, 6, 0, 4), c(-3, 0, 5, -2), c(2, 4, -2, 4))
condMVN: stredná hodnota a kov.matica podmieneného rozdelenia Y|X, kde Z=(X,Y)
dcmvnorm: hustota podmieneného rozdelenia
pcmvnorm: distr. funkcia podmieneného rozdelenia
rcmvnorm: generovanie z podmieneného rozdelenia
library(condMVNorm)
condMVN(mean, sigma, dependent.ind=c(1,2), given.ind=c(3,4), X.given=c(1,1))
## $condMean
## [1] 3 -2
##
## $condVar
## [,1] [,2]
## [1,] 5 2
## [2,] 2 1
dcmvnorm(x=c(3,4), mean, sigma, dependent.ind=c(1,2), given.ind=c(3,4), X.given=c(1,1))
## [1] 1.304118e-40
rcmvnorm(10,mean, sigma, dependent.ind=c(1,2), given.ind=c(3,4), X.given=c(1,1))
## [,1] [,2]
## [1,] 4.381039 -1.0797974
## [2,] 3.382739 -2.4800558
## [3,] 2.760072 -0.7427552
## [4,] 3.254314 -1.6442852
## [5,] 2.874583 -2.6112883
## [6,] 1.839835 -2.8663011
## [7,] 3.794057 -1.7614406
## [8,] 0.133222 -3.6394165
## [9,] 6.542526 -0.1653319
## [10,] 2.180010 -1.1634994
Uvažujme dáta Boston housing z 1. cvičenia.
boston <- read.table("http://www.iam.fmph.uniba.sk/ospm/Filova/vsa/bostonh.dat")
attach(boston)
Vezmime len premenné V6, V13 a V14, teda počet izieb v byte/dome, percento populácie s nízkym príjmom a cenu nehnuteľnosti a predpokladajme, že vektor \((V_6, V_{13}, V_{14})'\) má normálne rozdelenie. Aké bude rozdelenie ceny nehnuteľnosti za predpokladu, že \((V_6, V_{13})'=(5,20)'\)?
(mu <- apply(cbind(V6, V13, V14), 2, mean))
## V6 V13 V14
## 6.284634 12.653063 22.532806
(Sigma <- cov(cbind(V6, V13, V14)))
## V6 V13 V14
## V6 0.4936709 -3.079741 4.493446
## V13 -3.0797414 50.994760 -48.447538
## V14 4.4934459 -48.447538 84.586724
(cmean <- mu[3] + Sigma[3, 1:2] %*% solve(Sigma[1:2,1:2]) %*% (c(5, 20) - mu[1:2]))
## [,1]
## [1,] 11.2685
(cSigma <- Sigma[3,3] - Sigma[3, 1:2] %*% solve(Sigma[1:2,1:2]) %*% Sigma[1:2, 3])
## [,1]
## [1,] 30.57289
plot(dnorm(x <- seq(0, 50, length = 500), mean = cmean, sd = sqrt(cSigma)) ~ x, xlab = "Price", ylab = "Conditional Density", type = "l", lwd = 2, col = "blue", xlim = c(0, 50), ylim = c(0,0.18))
lines(dnorm(x, mean(V14), sd(V14)) ~ x, lwd = 1)
legend(0, 0.175, legend = c("Conditional density V14|(5,20)", "Estimated density of V14"), col = c("blue", "black"))
Zostrojíme dvojrozmerný náhodný vektor \((X, Y)'\), ktorý nemá normálne rozdelenie, ale marginálne rozdelenia sú \(X, Y\sim N(0,1)\).
library(copula)
## Warning: package 'copula' was built under R version 3.6.1
frank <- frankCopula(dim = 2, param = 12)
myDist <- mvdc(copula=frank, margins=c("norm", "norm"), paramMargins=list(list(mean=0, sd=1), list(mean=0, sd=1)))
contour(myDist, dMvdc, xlim = c(-2, 2), ylim=c(-2, 2), col=rainbow(10))
x <- seq(-2,2,0.01)
m1 <- m2 <- dnorm(x, 0, 1)
lines(x, m1 - abs(min(x)))
lines(m2 - abs(min(x)), x)
clayton <- claytonCopula(dim = 2, param = 12)
myDist <- mvdc(copula=clayton, margins=c("norm", "norm"), paramMargins=list(list(mean=0, sd=1), list(mean=0, sd=1)))
contour(myDist, dMvdc, xlim = c(-2, 2), ylim=c(-2, 2), col=rainbow(10))
x <- seq(-2,2,0.01)
m1 <- m2 <- dnorm(x, 0, 1)
lines(x, m1 - abs(min(x)))
lines(m2 - abs(min(x)), x)
Pomocou balíka condMVNorm overte správnosť výpočtov podmienených rozdelení z teoretických cvičení.
V príklade s dátami Boston housing skúste meniť hodnoty, ktoré nadobúdajú premenné V6 a V13. Čo sa napríklad stane s podmienenou strednou hodnotou V14 za podmienky \((V_6, V_{13})'=(6,10)'\)?