---
title: "Predictor Identifier: Nonparametric PREDiction"
author: 'Ashish Sharma, Raj Mehrotra, Sanjeev Jha, Jingwan Li and Ze Jiang'
date: "`r format(Sys.time(), '%H:%M:%S %d %B, %Y')`"
output: 
#  rmarkdown::html_vignette: 
  bookdown::html_vignette2: 
    toc: true
    number_sections: true
vignette: >
  %\VignetteEncoding{UTF-8}
  %\VignetteIndexEntry{Predictor Identifier: Nonparametric PREDiction}
  %\VignetteEngine{knitr::rmarkdown}
editor_options: 
  chunk_output_type: console
---

```{r, include = FALSE}
knitr::opts_chunk$set(
  echo = TRUE, collapse = TRUE, comment = "#>", warning = FALSE,
  
  out.width='100%',
  fig.align = "center", fig.pos = "h!"
)
```

```{r setup}
library(NPRED)
require(zoo)
require(ggplot2)
```

# Synthetic data generation

```{r dat, fig.cap='Example of AR models', fig.height=5, fig.width=9}
# Other statistical models for generating synthetic data see Package - synthesis
require(synthesis)

set.seed(2020) # set seed for reproducible results
# AR1 model from paper with 9 dummy variables
data.ar1 <- synthesis::data.gen.ar1(500)

# AR4 model from paper with total 9 dimensions
data.ar4 <- synthesis::data.gen.ar4(500)

# AR9 model from paper with total 9 dimensions
data.ar9 <- synthesis::data.gen.ar9(500)

# plot.zoo(cbind(data.ar1$x, data.ar4$x, data.ar9$x),
#   xlab = NA, main = "Example of AR models",
#   ylab = c("AR1", "AR4", "AR9"))
```


# Partial informational correlation (PIC)

```{r pic}
# calculate PIC
pic.calc(data.ar1$x, data.ar1$dp)

pic.calc(data.ar4$x, data.ar4$dp)

pic.calc(data.ar9$x, data.ar9$dp)

```

# Predictor identifier: stepwise.PIC

```{r fig, fig.cap='Example of PIC selection implemented in NPRED', fig.height=5, fig.width=9}
pic1 <- stepwise.PIC(data.ar1$x, data.ar1$dp)

pic4 <- stepwise.PIC(data.ar4$x, data.ar4$dp)

pic9 <- stepwise.PIC(data.ar9$x, data.ar9$dp)

```

# Partial weights (PW)

```{r pw}
# calculate partial weights
pw.calc(data.ar1$x, data.ar1$dp, pic1$cpy, pic1$cpyPIC)

pw.calc(data.ar4$x, data.ar4$dp, pic4$cpy, pic4$cpyPIC)

pw.calc(data.ar9$x, data.ar9$dp, pic9$cpy, pic9$cpyPIC)

```

# Nonparameteric prediction: knn

```{r knn, fig.cap='Example of KNN implemented in NPRED', fig.height=5,fig.width=9}
data("data3")
x <- ts(data3[, 1]) # response
z <- ts(data3[, -1]) # possible predictors
zout <- ts(data.gen.ar1(500, ndim = 15)$dp) # new input

xhat1 <- xhat2 <- x
# xhat1 <- NPRED::knn(x,z,zout,k=5,reg=T,extrap=F)
# xhat2 <- NPRED::knn(x,z,zout,k=5,reg=T,extrap=T)

for (i in 1:500) {
  xhat1[i] <- NPRED::knn(x[-i], z[-i, ], z[i, ], extrap = F)
  xhat2[i] <- NPRED::knn(x[-i], z[-i, ], z[i, ], extrap = T)
}

if (TRUE) {
  par(mfrow = c(1, 1),
      mar=c(3,2,1,1), mgp=c(1.5,0.5,0),
      pty = c("m"))

  ts.plot(x, xhat1, xhat2, col = c("black","red","blue"), ylim = c(-10, 10), lwd = c(1, 1, 1))
  legend("topleft",
    bty = "n", lwd = 3, cex = 1, lty = 1,
    # inset = c(-0.5, 0),
    legend = c("OBS", "Pred", "Pred(extrap=T)"),
    x.intersp = 0, xjust = 0, yjust = 0, text.width = c(0, 50, 50), horiz = T,
    col = c("black","red","blue"))

  par(mfrow = c(1, 1), pty = c("s"))
  plot(xhat1, xhat2, xlim = c(-10, 10), ylim = c(-10, 10))
  abline(coef = c(0, 1), lwd = 1, col = 2)

}
```


# Illustration of the usage of partial weights

```{r weights, fig.cap='Illustration of the usage of partial weights', fig.height=6, fig.width=9}
sample <- 500
k <- 0
u <- runif(sample, 0, 5 * pi)
z <- sin(u) + rnorm(sample, sd = 0.2)

u1 <- cbind(u, runif(sample, 0, 5 * pi), runif(sample, 0, 5 * pi), runif(sample, 0, 5 * pi))
# zhat1 <- knnregl1cv(x=z, z=u1, k=k)
zhat1 <- sapply(1:sample, function(i) knn(x = z[-i], z = u1[-i, ], zout = u1[i, ], k = k))

sel <- stepwise.PIC(x = z, py = u1)
sel
# zhat2 <- knnregl1cv(x=z, z=u1[,sel$cpy], k=k)
zhat2 <- sapply(1:sample, function(i) knn(x = z[-i], z = u1[-i, sel$cpy], zout = u1[i, sel$cpy], k = k))

if (TRUE) {
  par(mfrow = c(1, 1), pty = c("m"))
  
  plot(u, z, pch = 16)
  lines(sort(u), zhat1[order(u)], col = "green")
  lines(sort(u), zhat2[order(u)], col = "red")
  abline(a = 0, b = 0)
}
```

# Application to predicting drought anomalies

```{r exp, warning=FALSE, fig.cap='Application to predicting drought anomalies', fig.height=9, fig.width=7}
# An demo example used in Jiang, Z., Rashid, M. M., Johnson, F., & Sharma, A. (2020)
# Jiang, Z., Rashid, M. M., Johnson, F., & Sharma, A. (2021). A wavelet-based tool to modulate variance in predictors: An application to predicting drought anomalies. Environmental Modelling and Software, 135, 104907.

require(WASP)
#load response and predictor variables
data("SPI.12", package="WASP"); 
data("data.CI", package="WASP")
data("Ind_AWAP.2.5", package="WASP")

#study grids and period
Grid <- sample(Ind_AWAP.2.5,1)
Grid = 149 #Grid used in the SI
SPI.12.ts <- window(SPI.12, start=c(1910,1),end=c(2009,12))
data.CI.ts <- window(data.CI, start=c(1910,1),end=c(2009,12))
#partition into two folds
folds <- cut(seq(1,nrow(SPI.12.ts)),breaks=2,labels=FALSE)
sub.cali <- which(folds==1, arr.ind=TRUE); sub.vali <- which(folds==2, arr.ind=TRUE)
#-------------------------------------------------------------------------------------
###calibration and selection
data <- list(x=SPI.12.ts[sub.cali,Grid],dp=data.CI.ts[sub.cali,])

#variance transformation - calibration
modwt <- modwt.vt(data, wf="haar", J=8, boundary="periodic")

#stepwise PIC selection
sel <- NPRED::stepwise.PIC(modwt$x, modwt$dp.n)
#-------------------------------------------------------------------------------------
###validation and prediction
data.val <- list(x=SPI.12.ts[sub.vali,Grid],dp=data.CI.ts[sub.vali,])

#variance transformation - validation
modwt.val <- modwt.vt.val(data.val, J=8, modwt)

#knn prediction
cpy <- sel$cpy; wt <- sel$wt
x=data$x; z=modwt$dp.n[,cpy]; zout=modwt.val$dp.n[,cpy]
mod <- knn(x, z, zout, k=5, pw=wt, extrap=T)

#-------------------------------------------------------------------------------------
###plot
start.cal <- c(1910,1); start.val <- c(1960,1)
ndim = ncol(data.CI.ts); CI.names = colnames(data.CI.ts)
par(mfcol=c(ndim+1,2),mar=c(2,4,2,2),mgp=c(1.5,0.5,0),
    bg = "white",pty="m",cex.lab=1.5,ps=8)
#----------------------------------------------
#plot before and after vt - calibration
if(TRUE){

  x <- ts(modwt$x, start=start.cal, frequency = 12)
  dp <- ts(modwt$dp, start=start.cal, frequency = 12)
  dp.n <- ts(modwt$dp.n, start=start.cal, frequency = 12)

  ts.plot(x,xlab=NA, main=paste0("Sampled Grid: ", Grid), ylab=paste0("SPI",12), col=c("black"),lwd=c(1))
  for(nc in 1:ndim)
    ts.plot(dp[,nc],dp.n[,nc],xlab=NA,ylab=paste0(CI.names[nc]),
            col=c("red","blue"),lwd=c(1,1),lty=c(1,2))

}
#----------------------------------------------
#plot before and after vt - validation
if(TRUE){

  x <- ts(modwt.val$x, start=start.val, frequency = 12)
  dp <- ts(modwt.val$dp, start=start.val, frequency = 12)
  dp.n <- ts(modwt.val$dp.n, start=start.val, frequency = 12)

  ts.plot(x, xlab=NA, main=paste0("Sampled Grid: ",Grid), ylab=paste0("SPI",12), col=c("black"),lwd=c(1))
  for(nc in 1:ndim)
    ts.plot(dp[,nc],dp.n[,nc],xlab=NA,ylab=paste0(CI.names[nc]),
            col=c("red","blue"),lwd=c(1,1),lty=c(1,2))

}

```