Kopuly

library(copula)
## Warning: package 'copula' was built under R version 3.6.1
library(scatterplot3d)
library(ggplot2)
## Registered S3 methods overwritten by 'ggplot2':
##   method         from 
##   [.quosures     rlang
##   c.quosures     rlang
##   print.quosures rlang
library(grid)

Frechetove hranice

Generovanie kopuly so známymi parametrami

Dvojrozmerná gaussovská kopula s \(\rho = 0.7\)

(normal <- normalCopula(param = 0.7, dim = 2))
## Normal copula, dim. d = 2 
## Dimension:  2 
## Parameters:
##   rho.1   = 0.7

Dvojrozmerná t-kopula s \(\rho = 0.8\) a df = 2

(stc <- tCopula(param = 0.8, dim = 2, df = 2))
## t-copula, dim. d = 2 
## Dimension:  2 
## Parameters:
##   rho.1   = 0.8
##   df      = 2.0

Ďalšie Archimedovské kopuly

frank <- frankCopula(dim = 2, param = 8)
gumbel <- gumbelCopula(dim = 3, param = 5.6)
clayton <- claytonCopula(dim = 4, param = 19)

Generovanie viacrozmerných rozdelení pomocou kopulí

Claytonova kopula

cp <- claytonCopula(param = c(3.4), dim = 2)

Vygenerujeme viacrozmerné rozdelenie s normálnym a studentovým marginálnym rozdelením pomocou príkazu mvdc.

m_dist <- mvdc(copula = cp,  margins = c("norm", "t"), paramMargins = list(list(mean = 2, sd=3),
                list(df = 2)) )

print(m_dist)
## Multivariate Distribution Copula based ("mvdc")
##  @ copula:
## Clayton copula, dim. d = 2 
## Dimension:  2 
## Parameters:
##   alpha   = 3.4
##  @ margins:
## [1] "norm" "t"   
##    with 2 (not identical)  margins; with parameters (@ paramMargins) 
## List of 2
##  $ :List of 2
##   ..$ mean: num 2
##   ..$ sd  : num 3
##  $ :List of 1
##   ..$ df: num 2

Teraz môžeme používať dMvdc, pMvdc a rMvdc na distribučnú funkciu, hustotu a náhodný výber z tohto viacrozmerného rozdelenia. Napríklad

dMvdc(c(1,2),m_dist)
## [1] 0.00189003
persp  (m_dist, dMvdc, xlim = c(-4, 4), ylim=c(0, 2))

Generovanie náhodného výberu z kopuly

Vrátime sa ku kopulám vygenerovaným vyššie a vytvoríme náhodné výbery rozsahu 2000:

fr <- rCopula(2000, frank)
gu <- rCopula(2000, gumbel)
cl <- rCopula(2000, clayton)

head(fr)
##           [,1]       [,2]
## [1,] 0.9837336 0.86721208
## [2,] 0.7932888 0.59433157
## [3,] 0.6072518 0.78637924
## [4,] 0.1209396 0.38243332
## [5,] 0.2499018 0.01883573
## [6,] 0.5214747 0.50192946

Vykreslenie (marginálne rozdelenia sú rovnomerné)

qplot(fr[,1], fr[,2], colour = fr[,1], main="Frank copula random samples theta = 8", xlab = "u", ylab = "v")

qplot(gu[,1], gu[,2], colour = gu[,1], main="Gumbel copula random samples theta = 5.6", xlab = "u", ylab = "v") 

qplot(cl[,1], cl[,2], colour = cl[,1], main="Clayton copula random samples theta = 19", xlab = "u", ylab = "v")

Pre nami vytvorené rozdelenie m_dist:

samples <- rMvdc(2000, m_dist)
qplot(samples[,1], samples[,2], colour = samples[,1])

Hustota a distribučná funkcia

Vygenerujeme normálnu kopulu a 2000 pozorovaní

coef <- 0.8
mycopula <- normalCopula(coef, dim = 2)
u <- rCopula(2000, mycopula)

Hustota

pdf <- dCopula(u, mycopula)

Distribučná funkcia

cdf <- pCopula(u, mycopula)

Grafické znázornenie hustoty

par(mfrow=c(1,3))
scatterplot3d(u[,1], u[,2], pdf, color="red", main="Density", xlab ="u1", ylab="u2", zlab="dCopula", pch=16)
persp(mycopula, dCopula, main ="Density")
contour(mycopula, dCopula, xlim = c(0, 1), ylim=c(0, 1), main = "Contour plot")

Grafické znázornenie distribučnej funkcie

par(mfrow=c(1,3))
scatterplot3d(u[,1], u[,2], cdf, color="red", main="CDF", xlab = "u1", ylab="u2", zlab="pCopula",pch=16)
persp(mycopula, pCopula, main = "CDF")
contour(mycopula, pCopula, xlim = c(0, 1), ylim=c(0, 1), main = "Contour plot")

To isté môžeme urobiť aj pre naše viacrozmerné rozdelenie m_dist

v <- rMvdc(2000, m_dist)

Hustota

pdf_mvd <- dMvdc(v, m_dist)

Distribučná funkcia

cdf_mvd <- pMvdc(v, m_dist)

Grafické znázornenie

par(mfrow=c(1,3))
scatterplot3d(v[,1],v[,2], pdf_mvd, color="red", main="Density", xlab = "u1", ylab="u2", zlab="pMvdc",pch=16)
persp(m_dist, dMvdc, xlim = c(-10, 10), ylim=c(-4, 4), main = "Density")
contour(m_dist, dMvdc, xlim = c(-10, 10), ylim=c(-4, 4), main = "Contour plot")

par(mfrow=c(1,3))
scatterplot3d(v[,1],v[,2], cdf_mvd, color="red", main="CDF", xlab = "u1", ylab="u2", zlab="pMvdc",pch=16)
contour(m_dist, pMvdc, xlim = c(-20, 20), ylim=c(-5, 5), main = "Contour plot")
persp(m_dist, pMvdc, xlim = c(-20, 20), ylim=c(-5, 5), main = "CDF")

Vizuálne porovnanie hustoty a distribučnej funkcie najčastejšie používaných archimedovských kopúl

frank <- frankCopula(dim = 2, param = 3)
clayton <- claytonCopula(dim = 2, param = 1.2)
gumbel <- gumbelCopula(dim = 2, param = 1.5)

par(mfrow=c(1,3))
persp(frank, dCopula)
persp(clayton, dCopula)
persp(gumbel, dCopula)

par(mfrow=c(1,3))
contour(frank, dCopula, xlim = c(0, 1), ylim=c(0, 1))
contour(clayton, dCopula, xlim = c(0, 1), ylim=c(0, 1))
contour(gumbel, dCopula, xlim = c(0, 1), ylim=c(0, 1))

Príklad: výnosy

cree <- read.csv('http://www.iam.fmph.uniba.sk/ospm/Filova/vsa/cree_r.csv', header=FALSE)$V2
yahoo <- read.csv('http://www.iam.fmph.uniba.sk/ospm/Filova/vsa/yahoo_r.csv', header=FALSE)$V2

Korelácia a regresná priamka

plot(cree,yahoo,pch='.')
abline(lm(yahoo~cree),col='red',lwd=1)

cor(cree,yahoo,method='spearman')
## [1] 0.4023584

Akú kopulu použiť? Normálna nemusí byť v tomto prípade vhodná. Nájdeme vhodnú kopulu pomocou balíka VineCopula.

library(VineCopula)
## Warning: package 'VineCopula' was built under R version 3.6.1
u <- pobs(as.matrix(cbind(cree,yahoo)))[,1]
v <- pobs(as.matrix(cbind(cree,yahoo)))[,2]
selectedCopula <- BiCopSelect(u,v,familyset=NA)
selectedCopula
## Bivariate copula: t (par = 0.44, par2 = 3.84, tau = 0.29)

Skúsime, či to sedí s tým, čo dostaneme z tCopula v balíku copula

t.cop <- tCopula(dim=2)
set.seed(500)
m <- pobs(as.matrix(cbind(cree,yahoo)))
fit <- fitCopula(t.cop,m,method='ml')
coef(fit)
##    rho.1       df 
## 0.435630 3.844527

Hustota odhadnutej kopuly

rho <- coef(fit)[1]
df <- coef(fit)[2]
persp(tCopula(dim=2,rho,df=df),dCopula)

Kopulu skonštruujeme a vygenerujeme z nej pozorovania. Všimnime si, že vygenerované pozorovania majú rovnakú koreláciu ako dáta.

u <- rCopula(3965,tCopula(dim=2,rho,df=df))
plot(u[,1],u[,2],pch='.',col='blue')

cor(u,method='spearman')
##           [,1]      [,2]
## [1,] 1.0000000 0.3839496
## [2,] 0.3839496 1.0000000

Ešte potrebujeme modelovať marginálne rozdelenia. Odhadneme ich parametre:

cree_mu <- mean(cree)
cree_sd <- sd(cree)
yahoo_mu <- mean(yahoo)
yahoo_sd <- sd(yahoo)

Pozrieme sa, ako sa rozdelenia spravajú

par(mfrow=c(1,2))
hist(cree,breaks=80,main='Cree returns',freq=FALSE,density=30,col='cyan',ylim=c(0,20),xlim=c(-0.2,0.3))
lines(seq(-0.5,0.5,0.01),dnorm(seq(-0.5,0.5,0.01),cree_mu,cree_sd),col='red',lwd=2)
legend('topright',c('Fitted normal'),col=c('red'),lwd=2)
hist(yahoo,breaks=80,main='Yahoo returns',density=30,col='cyan',freq=FALSE,ylim=c(0,20),xlim=c(-0.2,0.2))
lines(seq(-0.5,0.5,0.01),dnorm(seq(-0.5,0.5,0.01),yahoo_mu,yahoo_sd),col='red',lwd=2)
legend('topright',c('Fitted normal'),col=c('red'),lwd=2)

Nasimulujeme pozorovania z vygenerovaného viacrozmerného rozdelenia

copula_dist <- mvdc(copula=tCopula(rho,dim=2,df=df), margins=c("norm","norm"), paramMargins=list(list(mean=cree_mu, sd=cree_sd), list(mean=yahoo_mu, sd=yahoo_sd)))
sim <- rMvdc(3965, copula_dist)

Urobíme vizuálne porovnanie: dostali sme sa dostatočne blízko k reálnym dátam. Máme ale odľahlé pozorovania, ktoré náš model nepostihuje

plot(cree,yahoo,main='Returns')
points(sim[,1],sim[,2],col='red')
legend('bottomright',c('Observed','Simulated'),col=c('black','red'),pch=21)

Ako sa môžu zmeniť nasimulované dáta, ak zmeníme parameter kopuly?

Korelačný koeficient ostáva veľmi podobný

set.seed(4258)
par(mfrow=c(1,2))
copula_dist <- mvdc(copula=tCopula(rho,dim=2,df=1), margins=c("norm","norm"),
                    paramMargins=list(list(mean=cree_mu, sd=cree_sd),
                                      list(mean=yahoo_mu, sd=yahoo_sd)))
sim <- rMvdc(3965, copula_dist)
plot(cree,yahoo,main='Returns')
points(sim[,1],sim[,2],col='red')
legend('bottomright',c('Observed','Simulated df=1'),col=c('black','red'),pch=21)
cor(sim[,1],sim[,2],method='spearman')
## [1] 0.3929509
copula_dist <- mvdc(copula=tCopula(rho,dim=2,df=8), margins=c("norm","norm"),
                    paramMargins=list(list(mean=cree_mu, sd=cree_sd),
                                      list(mean=yahoo_mu, sd=yahoo_sd)))
sim <- rMvdc(3965, copula_dist)
plot(cree,yahoo,main='Returns')
points(sim[,1],sim[,2],col='red')
legend('bottomright',c('Observed','Simulated df=8'),col=c('black','red'),pch=21)

cor(sim[,1],sim[,2],method='spearman')
## [1] 0.4412581