# Predmet: "Analyza zhlukov a klasifikacia dat" # Studijny program: "Pravdepodobnost a matematicka statistika" # Vyucujuci: Radoslav Harman, KAMS FMFI UK Bratislava # # Porovnanie viacerych klasifikacnych metod # Based on "Lab 4" from the book James, G. et al.: An Introduction to Statistical Learning, Springer 2013 # See http://www-bcf.usc.edu/~gareth/ISL/ and http://www-bcf.usc.edu/~gareth/ISL/Chapter%204%20Lab.txt library(ISLR); Smarket[1:10,]; dim(Smarket) pairs(Smarket[,c(-4,-5,-6)],pch=19,cex=0.1) cor(Smarket[,-9]) # Logistic Regression attach(Smarket); help(glm) glm.fit<-glm(Direction~Lag1+Lag2+Lag3+Lag4+Lag5+Volume,data=Smarket,family=binomial) summary(glm.fit) coef(glm.fit); glm.fit$coef;summary(glm.fit)$coef glm.probs<-predict(glm.fit,type="response"); plot(glm.probs,pch=19) glm.pred<-rep("Down",1250); glm.pred[glm.probs>.5]<-"Up" table(glm.pred,Direction); mean(glm.pred==Direction) train<-(Year<2005); Smarket.2005<-Smarket[!train,]; dim(Smarket.2005) glm.fit<-glm(Direction~Lag1+Lag2+Lag3+Lag4+Lag5+Volume,data=Smarket,family=binomial,subset=train) summary(glm.fit) glm.probs<-predict(glm.fit,newdata=Smarket.2005,type="response") glm.pred<-rep("Down",252); glm.pred[glm.probs>.5]<-"Up" Direction.2005<-Direction[!train] table(glm.pred,Direction.2005);mean(glm.pred==Direction.2005) glm.fit<-glm(Direction~Lag1+Lag2,data=Smarket,family=binomial,subset=train) glm.probs<-predict(glm.fit,Smarket.2005,type="response") glm.pred<-rep("Down",252); glm.pred[glm.probs>.5]<-"Up" table(glm.pred,Direction.2005);mean(glm.pred==Direction.2005) glm.pred<-as.factor(glm.pred) plot(Lag1[!train],Lag2[!train],col=as.numeric(Direction[!train])) plot(Lag1[!train],Lag2[!train],col=as.numeric(Direction[!train]),pch=19+2*abs(as.numeric(glm.pred)-as.numeric(Direction[!train]))) # Linear Discriminant Analysis library(MASS) lda.fit<-lda(Direction~Lag1+Lag2,data=Smarket,subset=train) lda.fit; names(lda.fit) mean(lda.fit$means%*%lda.fit$scaling) lda.pred<-predict(lda.fit, Smarket.2005); names(lda.pred) table(lda.pred$class,Direction.2005); mean(lda.pred$class==Direction.2005) plot(lda.pred$posterior[,1]) # Quadratic Discriminant Analysis library(MASS); help(qda) qda.fit<-qda(Direction~Lag1+Lag2,data=Smarket,subset=train) qda.fit; names(qda.fit) qda.pred<-predict(qda.fit,Smarket.2005); names(qda.pred) table(qda.pred$class,Direction.2005); mean(qda.pred$class==Direction.2005) plot(qda.pred$posterior[,1]) plot(Lag1[!train],Lag2[!train],col=as.numeric(Direction[!train])) plot(Lag1[!train],Lag2[!train],col=as.numeric(Direction[!train]),pch=19+2*abs(as.numeric(qda.pred$class)-as.numeric(Direction[!train]))) # K-Nearest Neighbors library(class); help(knn) train.X<-cbind(Lag1,Lag2)[train,] test.X<-cbind(Lag1,Lag2)[!train,] train.Direction<-Direction[train] knn.pred<-knn(train.X,test.X,train.Direction,k=1) table(knn.pred,Direction.2005) mean(knn.pred==Direction.2005) knn.pred<-knn(train.X,test.X,train.Direction,k=3) table(knn.pred,Direction.2005) mean(knn.pred==Direction.2005) plot(Lag1[!train],Lag2[!train],col=as.numeric(Direction[!train])) plot(Lag1[!train],Lag2[!train],col=as.numeric(Direction[!train]),pch=19+2*abs(as.numeric(knn.pred)-as.numeric(Direction[!train]))) test.alot<-cbind(runif(10000,min=-2.5,max=2.5),runif(10000,min=-2.5,max=2.5)) knn.pred<-knn(train.X,test.alot,train.Direction,k=3) plot(test.alot[,1],test.alot[,2],col=as.numeric(knn.pred),pch=19) knn.pred<-knn(train.X,test.alot,train.Direction,k=101) plot(test.alot[,1],test.alot[,2],col=as.numeric(knn.pred),pch=19)