library(copula)
## Warning: package 'copula' was built under R version 3.6.1
library(scatterplot3d)
library(ggplot2)
## Registered S3 methods overwritten by 'ggplot2':
## method from
## [.quosures rlang
## c.quosures rlang
## print.quosures rlang
library(grid)
Dvojrozmerná gaussovská kopula s \(\rho = 0.7\)
(normal <- normalCopula(param = 0.7, dim = 2))
## Normal copula, dim. d = 2
## Dimension: 2
## Parameters:
## rho.1 = 0.7
Dvojrozmerná t-kopula s \(\rho = 0.8\) a df = 2
(stc <- tCopula(param = 0.8, dim = 2, df = 2))
## t-copula, dim. d = 2
## Dimension: 2
## Parameters:
## rho.1 = 0.8
## df = 2.0
Ďalšie Archimedovské kopuly
frank <- frankCopula(dim = 2, param = 8)
gumbel <- gumbelCopula(dim = 3, param = 5.6)
clayton <- claytonCopula(dim = 4, param = 19)
Claytonova kopula
cp <- claytonCopula(param = c(3.4), dim = 2)
Vygenerujeme viacrozmerné rozdelenie s normálnym a studentovým marginálnym rozdelením pomocou príkazu mvdc.
m_dist <- mvdc(copula = cp, margins = c("norm", "t"), paramMargins = list(list(mean = 2, sd=3),
list(df = 2)) )
print(m_dist)
## Multivariate Distribution Copula based ("mvdc")
## @ copula:
## Clayton copula, dim. d = 2
## Dimension: 2
## Parameters:
## alpha = 3.4
## @ margins:
## [1] "norm" "t"
## with 2 (not identical) margins; with parameters (@ paramMargins)
## List of 2
## $ :List of 2
## ..$ mean: num 2
## ..$ sd : num 3
## $ :List of 1
## ..$ df: num 2
Teraz môžeme používať dMvdc, pMvdc a rMvdc na distribučnú funkciu, hustotu a náhodný výber z tohto viacrozmerného rozdelenia. Napríklad
dMvdc(c(1,2),m_dist)
## [1] 0.00189003
persp (m_dist, dMvdc, xlim = c(-4, 4), ylim=c(0, 2))
Vrátime sa ku kopulám vygenerovaným vyššie a vytvoríme náhodné výbery rozsahu 2000:
fr <- rCopula(2000, frank)
gu <- rCopula(2000, gumbel)
cl <- rCopula(2000, clayton)
head(fr)
## [,1] [,2]
## [1,] 0.9837336 0.86721208
## [2,] 0.7932888 0.59433157
## [3,] 0.6072518 0.78637924
## [4,] 0.1209396 0.38243332
## [5,] 0.2499018 0.01883573
## [6,] 0.5214747 0.50192946
Vykreslenie (marginálne rozdelenia sú rovnomerné)
qplot(fr[,1], fr[,2], colour = fr[,1], main="Frank copula random samples theta = 8", xlab = "u", ylab = "v")
qplot(gu[,1], gu[,2], colour = gu[,1], main="Gumbel copula random samples theta = 5.6", xlab = "u", ylab = "v")
qplot(cl[,1], cl[,2], colour = cl[,1], main="Clayton copula random samples theta = 19", xlab = "u", ylab = "v")
Pre nami vytvorené rozdelenie m_dist:
samples <- rMvdc(2000, m_dist)
qplot(samples[,1], samples[,2], colour = samples[,1])
Vygenerujeme normálnu kopulu a 2000 pozorovaní
coef <- 0.8
mycopula <- normalCopula(coef, dim = 2)
u <- rCopula(2000, mycopula)
Hustota
pdf <- dCopula(u, mycopula)
Distribučná funkcia
cdf <- pCopula(u, mycopula)
Grafické znázornenie hustoty
par(mfrow=c(1,3))
scatterplot3d(u[,1], u[,2], pdf, color="red", main="Density", xlab ="u1", ylab="u2", zlab="dCopula", pch=16)
persp(mycopula, dCopula, main ="Density")
contour(mycopula, dCopula, xlim = c(0, 1), ylim=c(0, 1), main = "Contour plot")
Grafické znázornenie distribučnej funkcie
par(mfrow=c(1,3))
scatterplot3d(u[,1], u[,2], cdf, color="red", main="CDF", xlab = "u1", ylab="u2", zlab="pCopula",pch=16)
persp(mycopula, pCopula, main = "CDF")
contour(mycopula, pCopula, xlim = c(0, 1), ylim=c(0, 1), main = "Contour plot")
To isté môžeme urobiť aj pre naše viacrozmerné rozdelenie m_dist
v <- rMvdc(2000, m_dist)
Hustota
pdf_mvd <- dMvdc(v, m_dist)
Distribučná funkcia
cdf_mvd <- pMvdc(v, m_dist)
Grafické znázornenie
par(mfrow=c(1,3))
scatterplot3d(v[,1],v[,2], pdf_mvd, color="red", main="Density", xlab = "u1", ylab="u2", zlab="pMvdc",pch=16)
persp(m_dist, dMvdc, xlim = c(-10, 10), ylim=c(-4, 4), main = "Density")
contour(m_dist, dMvdc, xlim = c(-10, 10), ylim=c(-4, 4), main = "Contour plot")
par(mfrow=c(1,3))
scatterplot3d(v[,1],v[,2], cdf_mvd, color="red", main="CDF", xlab = "u1", ylab="u2", zlab="pMvdc",pch=16)
contour(m_dist, pMvdc, xlim = c(-20, 20), ylim=c(-5, 5), main = "Contour plot")
persp(m_dist, pMvdc, xlim = c(-20, 20), ylim=c(-5, 5), main = "CDF")
Vizuálne porovnanie hustoty a distribučnej funkcie najčastejšie používaných archimedovských kopúl
frank <- frankCopula(dim = 2, param = 3)
clayton <- claytonCopula(dim = 2, param = 1.2)
gumbel <- gumbelCopula(dim = 2, param = 1.5)
par(mfrow=c(1,3))
persp(frank, dCopula)
persp(clayton, dCopula)
persp(gumbel, dCopula)
par(mfrow=c(1,3))
contour(frank, dCopula, xlim = c(0, 1), ylim=c(0, 1))
contour(clayton, dCopula, xlim = c(0, 1), ylim=c(0, 1))
contour(gumbel, dCopula, xlim = c(0, 1), ylim=c(0, 1))
cree <- read.csv('http://www.iam.fmph.uniba.sk/ospm/Filova/vsa/cree_r.csv', header=FALSE)$V2
yahoo <- read.csv('http://www.iam.fmph.uniba.sk/ospm/Filova/vsa/yahoo_r.csv', header=FALSE)$V2
Korelácia a regresná priamka
plot(cree,yahoo,pch='.')
abline(lm(yahoo~cree),col='red',lwd=1)
cor(cree,yahoo,method='spearman')
## [1] 0.4023584
Akú kopulu použiť? Normálna nemusí byť v tomto prípade vhodná. Nájdeme vhodnú kopulu pomocou balíka VineCopula.
library(VineCopula)
## Warning: package 'VineCopula' was built under R version 3.6.1
u <- pobs(as.matrix(cbind(cree,yahoo)))[,1]
v <- pobs(as.matrix(cbind(cree,yahoo)))[,2]
selectedCopula <- BiCopSelect(u,v,familyset=NA)
selectedCopula
## Bivariate copula: t (par = 0.44, par2 = 3.84, tau = 0.29)
Skúsime, či to sedí s tým, čo dostaneme z tCopula v balíku copula
t.cop <- tCopula(dim=2)
set.seed(500)
m <- pobs(as.matrix(cbind(cree,yahoo)))
fit <- fitCopula(t.cop,m,method='ml')
coef(fit)
## rho.1 df
## 0.435630 3.844527
Hustota odhadnutej kopuly
rho <- coef(fit)[1]
df <- coef(fit)[2]
persp(tCopula(dim=2,rho,df=df),dCopula)
Kopulu skonštruujeme a vygenerujeme z nej pozorovania. Všimnime si, že vygenerované pozorovania majú rovnakú koreláciu ako dáta.
u <- rCopula(3965,tCopula(dim=2,rho,df=df))
plot(u[,1],u[,2],pch='.',col='blue')
cor(u,method='spearman')
## [,1] [,2]
## [1,] 1.0000000 0.3839496
## [2,] 0.3839496 1.0000000
Ešte potrebujeme modelovať marginálne rozdelenia. Odhadneme ich parametre:
cree_mu <- mean(cree)
cree_sd <- sd(cree)
yahoo_mu <- mean(yahoo)
yahoo_sd <- sd(yahoo)
Pozrieme sa, ako sa rozdelenia spravajú
par(mfrow=c(1,2))
hist(cree,breaks=80,main='Cree returns',freq=FALSE,density=30,col='cyan',ylim=c(0,20),xlim=c(-0.2,0.3))
lines(seq(-0.5,0.5,0.01),dnorm(seq(-0.5,0.5,0.01),cree_mu,cree_sd),col='red',lwd=2)
legend('topright',c('Fitted normal'),col=c('red'),lwd=2)
hist(yahoo,breaks=80,main='Yahoo returns',density=30,col='cyan',freq=FALSE,ylim=c(0,20),xlim=c(-0.2,0.2))
lines(seq(-0.5,0.5,0.01),dnorm(seq(-0.5,0.5,0.01),yahoo_mu,yahoo_sd),col='red',lwd=2)
legend('topright',c('Fitted normal'),col=c('red'),lwd=2)
Nasimulujeme pozorovania z vygenerovaného viacrozmerného rozdelenia
copula_dist <- mvdc(copula=tCopula(rho,dim=2,df=df), margins=c("norm","norm"), paramMargins=list(list(mean=cree_mu, sd=cree_sd), list(mean=yahoo_mu, sd=yahoo_sd)))
sim <- rMvdc(3965, copula_dist)
Urobíme vizuálne porovnanie: dostali sme sa dostatočne blízko k reálnym dátam. Máme ale odľahlé pozorovania, ktoré náš model nepostihuje
plot(cree,yahoo,main='Returns')
points(sim[,1],sim[,2],col='red')
legend('bottomright',c('Observed','Simulated'),col=c('black','red'),pch=21)
Ako sa môžu zmeniť nasimulované dáta, ak zmeníme parameter kopuly?
Korelačný koeficient ostáva veľmi podobný
set.seed(4258)
par(mfrow=c(1,2))
copula_dist <- mvdc(copula=tCopula(rho,dim=2,df=1), margins=c("norm","norm"),
paramMargins=list(list(mean=cree_mu, sd=cree_sd),
list(mean=yahoo_mu, sd=yahoo_sd)))
sim <- rMvdc(3965, copula_dist)
plot(cree,yahoo,main='Returns')
points(sim[,1],sim[,2],col='red')
legend('bottomright',c('Observed','Simulated df=1'),col=c('black','red'),pch=21)
cor(sim[,1],sim[,2],method='spearman')
## [1] 0.3929509
copula_dist <- mvdc(copula=tCopula(rho,dim=2,df=8), margins=c("norm","norm"),
paramMargins=list(list(mean=cree_mu, sd=cree_sd),
list(mean=yahoo_mu, sd=yahoo_sd)))
sim <- rMvdc(3965, copula_dist)
plot(cree,yahoo,main='Returns')
points(sim[,1],sim[,2],col='red')
legend('bottomright',c('Observed','Simulated df=8'),col=c('black','red'),pch=21)
cor(sim[,1],sim[,2],method='spearman')
## [1] 0.4412581