## ----setup, include = FALSE---------------------------------------------------
knitr::opts_chunk$set(
  collapse = TRUE,
  comment = "#>"
)
knitr::opts_chunk$set(fig.align = "center")
knitr::opts_chunk$set(fig.width = 6, fig.height = 6) 

## ----preinstall---------------------------------------------------------------
library(calibrate)
library(ggplot2)
library(corrplot)
library(Correlplot)

## -----------------------------------------------------------------------------
data("Kernels")
X <- Kernels[Kernels$variety==1,]
X <- X[,-8]
head(X)

## -----------------------------------------------------------------------------
p <- ncol(X)
R <- cor(X)
round(R,digits=3)

## ----corrgram, fig.cap = "A corrgram of the wheat kernel data."---------------
corrplot(R, method="circle",type="lower")

## ----pnames, echo = FALSE-----------------------------------------------------
vnames <- substr(colnames(R),1,4)
colnames(R) <- rownames(R) <- vnames
colnames(X) <- vnames

## ----correlogram, fig.cap = "The correlogram of the wheat kernel data."-------
theta.cos <- correlogram(R,xlim=c(-1.1,1.1),ylim=c(-1.1,1.1),main="CRG")

## -----------------------------------------------------------------------------
Rhat.cor <- angleToR(theta.cos)

## -----------------------------------------------------------------------------
W1 <- matrix(1,p,p)
rmse.crg <- rmse(R,Rhat.cor,W=W1)
rmse.crg

## ----linearcorrelogram, fig.cap="Linear correlogram of the wheat kernel data."----
theta.lin <- correlogram(R,ifun="lincos",labs=colnames(R),xlim=c(-1.1,1.1),
                         ylim=c(-1.1,1.1),main="CRG")

## -----------------------------------------------------------------------------
Rhat.corlin <- angleToR(theta.lin,ifun="lincos")
rmse.lin <- rmse(R,Rhat.corlin,W=W1)
rmse.lin

## ----ggcorrelogram, fig.cap="Correlogram of the wheat kernel data on a ggplot2 canvas."----
set.seed(123)
crgR <- ggcorrelogram(R,main="CRG",vjust=c(0,2,-1,2,0,-1,2),
                     hjust=0)
crgR
crgR$theta

## ----pcabiplot, fig.cap = "PCA biplot of the (standardized) wheat kernel data."----
n <- nrow(X)
Xt <- scale(X)/sqrt(n-1)
res.svd <- svd(Xt)
Fs <- sqrt(n)*res.svd$u # standardized principal components
Gp <- res.svd$v%*%diag(res.svd$d) # biplot coordinates for variables
bplot(Fs,Gp,colch=NA,collab=colnames(X),xlab="PC1",ylab="PC2",main="PCA")

## ----talliedcorrelationcircle, fig.cap = "PCA biplot of the correlation matrix with correlation tally sticks."----
bplot(Gp,Gp,colch=NA,rowch=NA,collab=colnames(X),xl=c(-1,1),yl=c(-1,1),main="PCA")
circle()
tally(Gp[-6,1:2],values=seq(-0.2,0.8,by=0.2))

## -----------------------------------------------------------------------------
Rhat.pca <- Gp[,1:2]%*%t(Gp[,1:2])

## -----------------------------------------------------------------------------
rmse.pca <- rmse(R,Rhat.pca,W=W1)
rmse.pca

## -----------------------------------------------------------------------------
rmse(R,Rhat.pca,W=W1,per.variable=TRUE)

## -----------------------------------------------------------------------------
limits <- jointlim(Fs,Gp)
limits

## -----------------------------------------------------------------------------
df.rows <- data.frame(Fs[,1:2])
colnames(df.rows) <- c("PA1","PA2")
df.rows$strings <- 1:n

df.cols <- data.frame(Gp[,1:2])
colnames(df.cols) <- c("PA1","PA2")
df.cols$strings <- colnames(R)

## -----------------------------------------------------------------------------
lambda      <- res.svd$d^2
lambda.frac <- lambda/sum(lambda)
lambda.cum  <- cumsum(lambda.frac)
decom <- round(rbind(lambda,lambda.frac,lambda.cum),digits=3)
colnames(decom) <- paste("PC",1:p,sep="")
decom

## -----------------------------------------------------------------------------
xlab <- paste("PC1 (",round(100*lambda.frac[1],digits=1),"%)",sep="")
ylab <- paste("PC2 (",round(100*lambda.frac[2],digits=1),"%)",sep="")

## ----ggbiplot, fig.cap = "PCA biplot on a ggplot canvas"----------------------
biplotX <- ggbplot(df.rows,df.cols,main="PCA biplot",xlab=xlab,
              ylab=ylab,xlim=limits$xlim,ylim=limits$ylim,
              colch="")
biplotX

## -----------------------------------------------------------------------------
lambdasq      <- lambda^2
lambdasq.frac <- lambdasq/sum(lambdasq)
lambdasq.cum  <- cumsum(lambdasq.frac)
decomsq <- round(rbind(lambdasq,lambdasq.frac,lambdasq.cum),
                 digits=3)
colnames(decomsq) <- paste("PC",1:p,sep="")
decomsq

xlab <- paste("PC1 (",round(100*lambdasq.frac[1],digits=1),"%)",sep="")
ylab <- paste("PC2 (",round(100*lambdasq.frac[2],digits=1),"%)",sep="")


## ----anotherbiplot, fig.cap = "PCA Correlation biplot on a ggplot canvas."----
biplotR <- ggbplot(df.cols,df.cols,main="PCA Correlation biplot",xlab=xlab,
              ylab=ylab,xlim=c(-1,1),ylim=c(-1,1),
              rowarrow=TRUE,rowcolor="blue",colch="",
              rowch="")

biplotR

## ----yetanotherbiplot, fig.cap = "PCA Correlation biplot with correlation tally sticks."----
biplotR <- ggtally(Gp[-6,1:2],biplotR,values=seq(-0.2,0.8,by=0.2),dotsize=0.2)
biplotR <- ggtally(Gp[6,1:2],biplotR,values=seq(-0.01,0.01,by=0.01),dotsize=0.2)
biplotR

## ----mdsplot, fig.cap = "MDS map of the correlation matrix of the wheat kernel data."----
Di <- sqrt(2*(1-R))
out.mds <- cmdscale(Di,eig = TRUE)
Fp <- out.mds$points[,1:2]
opar <- par(bty = "l")
plot(Fp[,1],Fp[,2],asp=1,xlab="First principal axis",
     ylab="Second principal axis",main="MDS")
textxy(Fp[,1],Fp[,2],colnames(R),cex=0.75)
par(opar)

ii <- which(R < 0,arr.ind = TRUE)

for(i in 1:nrow(ii)) {
  segments(Fp[ii[i,1],1],Fp[ii[i,1],2],
           Fp[ii[i,2],1],Fp[ii[i,2],2],col="red",lty="dashed")
}

## -----------------------------------------------------------------------------
Dest <- as.matrix(dist(Fp[,1:2]))
Rhat.mds <- 1-0.5*Dest*Dest

## -----------------------------------------------------------------------------
rmse.mds <- rmse(R,Rhat.mds,W=W1)
rmse.mds

## ----mdsggplot, fig.cap = "MDS map on ggplot canvas."-------------------------
colnames(Fp) <- paste("PA",1:2,sep="")
rownames(Fp) <- as.character(1:nrow(Fp))
Fp <- data.frame(Fp)
Fp$strings <- colnames(R)
MDSmap <- ggplot(Fp, aes(x = PA1, y = PA2)) + 
            coord_equal(xlim = c(-1,1), ylim = c(-1,1)) +
            ggtitle("MDS") +
            xlab("First principal axis") + ylab("Second principal axis") +
            theme(plot.title = element_text(hjust = 0.5, 
                                            size = 8),
                  axis.ticks = element_blank(),
                  axis.text.x = element_blank(),
                  axis.text.y = element_blank())
MDSmap <- MDSmap + geom_point(data = Fp, aes(x = PA1, y = PA2), 
                        colour = "black", shape = 1)
MDSmap <- MDSmap + geom_text(data = Fp, aes(label = strings), 
                        size = 3, alpha = 1,
                      vjust = 2) 

Z <- matrix(NA,nrow=nrow(ii),ncol=4)
for(i in 1:nrow(ii)) {
  Z[i,] <- c(Fp[ii[i,1],1],Fp[ii[i,1],2],Fp[ii[i,2],1],Fp[ii[i,2],2])
}
colnames(Z) <- c("X1","Y1","X2","Y2")
Z <- data.frame(Z)

MDSmap <- MDSmap + geom_segment(data=Z,aes(x=X1,y=Y1,
                                           xend=X2,yend=Y2),
                         alpha=0.75,color="red",linetype=2)   
MDSmap

## ----pfa----------------------------------------------------------------------
out.pfa <- Correlplot::pfa(X,verbose=FALSE)
L <- out.pfa$La

## -----------------------------------------------------------------------------
Rhat.pfa <- L[,1:2]%*%t(L[,1:2])
rmse.pfa <- rmse(R,Rhat.pfa)
rmse.pfa

## ----pfabiplot, fig.cap = "PFA biplot of the correlation matrix of the wheat kernel data."----
bplot(L,L,pch=NA,xl=c(-1,1),yl=c(-1,1),xlab="Factor 1",ylab="Factor 2",main="PFA",rowch=NA,
      colch=NA)
circle()

## -----------------------------------------------------------------------------
diag(out.pfa$Psi)

## ----pfaggbiplot, fig.cap = "PFA biplot on a ggplot canvas."------------------
L.df <- data.frame(L,rownames(L))
colnames(L.df) <- c("PA1","PA2","strings")
ggbplot(L.df,L.df,xlab="Factor 1",ylab="Factor 2",main="PFA biplot",
        rowarrow=TRUE,rowcolor="blue",colch="",rowch="")

## -----------------------------------------------------------------------------
Wdiag0 <- matrix(1,nrow(R),nrow(R))
diag(Wdiag0) <- 0
Fp.als <- ipSymLS(R,w=Wdiag0,eps=1e-15)

## ----ipsymlsplot, fig.cap = "WALS biplot of the correlation matrix of the wheat kernel data."----
bplot(Fp.als,Fp.als,rowch=NA,colch=NA,collab=colnames(R),
      xl=c(-1.1,1.1),yl=c(-1.1,1.1),main="WALS")
circle()

## -----------------------------------------------------------------------------
Rhat.wals <- Fp.als%*%t(Fp.als)
sqrt(diag(Rhat.wals))
rmse.als <- rmse(R,Rhat.wals)
rmse.als

## -----------------------------------------------------------------------------
out.wals <- wAddPCA(R, Wdiag0, add = "nul", verboseout = FALSE, epsout = 1e-10)
Rhat.wals <- out.wals$a%*%t(out.wals$b)
out.eig <- eigen(Rhat.wals)
Fp.adj <- out.eig$vectors[,1:2]%*%diag(sqrt(out.eig$values[1:2]))
rmse.als <- rmse(R,Rhat.wals)
rmse.als

## ----ipsymlsggplot, fig.cap = "WALS biplot of the correlation matrix of the wheat kernel data."----
Fp.als.df <- data.frame(Fp.als,colnames(R))
colnames(Fp.als.df) <- c("PA1","PA2","strings")
ggbplot(Fp.als.df,Fp.als.df,xlab="Dimension 1",ylab="Dimension 2",main="WALS biplot",
        rowarrow=TRUE,rowcolor="blue",colch="",rowch="")

## -----------------------------------------------------------------------------
out.wals <- wAddPCA(R, Wdiag0, add = "one", verboseout = FALSE, epsout = 1e-10)
delta <- out.wals$delta[1,1]
Rhat <- out.wals$a%*%t(out.wals$b)
out.eig <- eigen(Rhat)
Fp.adj <- out.eig$vectors[,1:2]%*%diag(sqrt(out.eig$values[1:2]))

## ----walsbiplot, fig.cap = "WALS biplot of the correlation matrix of the wheat kernel data, with the use of an additive adjustment."----
bplot(Fp.adj,Fp.adj,rowch=NA,colch=NA,collab=colnames(R),
      xl=c(-1.2,1.2),yl=c(-1.2,1.2),main="WALS adjusted")
circle()

## -----------------------------------------------------------------------------
Rhat.adj <- Fp.adj%*%t(Fp.adj)+delta
rmse.adj <- rmse(R,Rhat.adj)
rmse.adj

## -----------------------------------------------------------------------------
rmsevector <- c(rmse.crg,rmse.lin,rmse.pca,rmse.mds,rmse.pfa,rmse.als,rmse.adj)
methods <- c("CRG (cos)","CRG (lin)","PCA","MDS",
"PFA","WALS R","WALS Radj")
results <- data.frame(methods,rmsevector)
results <- results[order(rmsevector),]
results

## -----------------------------------------------------------------------------
output <- FitRwithPCAandWALS(R,eps=1e-15)
rmsePCAandWALS(R,output)

## ----walsggbiplot, fig.cap = "WALS biplot of the correlation matrix of the wheat kernel data, with additive adjustment and tally sticks."----
Fp.adj.df <- data.frame(Fp.adj,colnames(R))
colnames(Fp.adj) <- c("PA1","PA2")
colnames(Fp.adj.df) <- c("PA1","PA2","strings")
WALSbiplot.adj <- ggbplot(Fp.adj.df,Fp.adj.df,xlab="Dimension 1",ylab="Dimension 2",
                          main="WALS adjusted",rowarrow=TRUE,
                          rowcolor = "blue",rowch="",colch="")
WALSbiplot.adj <- ggtally(Fp.adj[-6,1:2],WALSbiplot.adj,
                          adj=-out.wals$delta[1,1],
                   values=seq(-0.2,0.8,by=0.2),dotsize=0.2)
WALSbiplot.adj

## -----------------------------------------------------------------------------
out.walsq.sym <- FitRDeltaQSym(R,Wdiag0,itmax.inner=30000,itmax.outer=30000,
                     eps=1e-8)
Gq.sym <- out.walsq.sym$C
rownames(Gq.sym) <- colnames(R)
Rhat.wsym <- out.walsq.sym$Rhat
rmse.wsym <- rmse(R,Rhat.wsym)
rmse.wsym

## ----newplot, fig.cap = "WALS biplot of the correlation matrix with column adjustment and tally sticks."----
Gq.sym.df <- data.frame(Gq.sym)
Gq.sym.df$strings <- colnames(R)
colnames(Gq.sym.df) <- c("PA1","PA2","strings")
ll <- 1.5
WALSbiplotq.sym <- ggbplot(Gq.sym.df,Gq.sym.df,main="WALS-q-sym biplot",xlab="Dimension 1",
                           ylab="Dimension 2",
                           ylim=c(-ll,ll),xlim=c(-ll,ll),circle=TRUE,
                           rowarrow=TRUE,rowcolor="blue",rowch="",
                           colch="")
for(i in c(1:5,7)) {
   WALSbiplotq.sym <- ggtally(Gq.sym[i,1:2],WALSbiplotq.sym,
                              adj=-out.walsq.sym$delta-out.walsq.sym$q[i], 
              values=seq(-0.2,1,by=0.2),dotsize=0.2)  
}
WALSbiplotq.sym

## -----------------------------------------------------------------------------
out.walsq.sym$delta + out.walsq.sym$q