## ---- include = FALSE---------------------------------------------------------
knitr::opts_chunk$set(
  collapse = TRUE,
  comment = "#>"
)

## ---- echo = TRUE, eval = FALSE,  fig.width = 7, fig.height= 4----------------
#  library(dplyr)
#  ### Simulate Data
#  Nobs = 2000
#  DAT = MASS::mvrnorm(n = Nobs, mu = c(0,0,0), Sigma = rbind(c(1, 0.18, 0.42), c(0.18, 0.09, 0.12),c(0.42, 0.12, 0.49 )))
#  Y = DAT[,1]
#  B = DAT[,2]
#  X = DAT[,3]
#  S = sample(x=c(0,1), size = Nobs, prob = c(0.5,0.5), replace = TRUE)
#  complete_cases = data.frame(Y, X, B, S)[S == 1,] #complete case subjects only
#  observed_data = data.frame(Y, X, B, S) #data with missingness in B
#  observed_data[S==0,'B'] = NA
#  
#  ### Step 1: Impute B|X,Y
#  imputes = mice::mice(observed_data, m=50, method="norm", printFlag=F, maxit = 1)
#  pred = imputes$predictorMatrix
#  pred[pred != 0] = 0
#  pred["B","X"] = 1
#  pred["B","Y"] = 1
#  imputes = mice::mice(observed_data, m=50, predictorMatrix=pred, method="norm", printFlag=F)
#  
#  ### Step 2: Stack imputed datasets	
#  stack = mice::complete(imputes, action="long", include = FALSE)
#  
#  ### Step 3: Obtain weights
#  stack$wt = 1
#  stack = as.data.frame(stack %>% group_by(.id) %>% mutate(wt = wt / sum(wt)))
#  
#  ### Step 4: Point estimation
#  fit = glm(Y ~X + B, data=stack, family=gaussian(), weights = stack$wt)
#  
#  ### Step 5a: Variance estimation option 1 (for glm and coxph models only)
#  Info = StackImpute::Louis_Information(fit, stack, M = 50)
#  VARIANCE = diag(solve(Info))
#  
#  ### Step 5b: Variance estimation using custom score and covariance matrices (any model with corresponding likelihood)
#  covariates = as.matrix(cbind(1,stack$X, stack$B))
#  score = sweep(covariates,1,stack$Y - covariates %*% matrix(coef(fit)), '*')/StackImpute::glm.weighted.dispersion(fit)
#  covariance_weighted = summary(fit)$cov.unscaled*StackImpute::glm.weighted.dispersion(fit)
#  Info = StackImpute::Louis_Information_Custom(score, covariance_weighted, stack, M = 50)
#  VARIANCE_custom = diag(solve(Info))
#  
#  ### Step 5c: Variance estimation using bootstrap (any model with vcov method)
#  bootcovar = StackImpute::Bootstrap_Variance(fit, stack, M = 50, n_boot = 100)
#  VARIANCE_boot = diag(bootcovar)
#  
#  ### Step 5d: Variance estimation using jackknife (any model with vcov method)
#  jackcovar = StackImpute::Jackknife_Variance(fit, stack, M = 50)
#  VARIANCE_jack = diag(jackcovar)
#  

## ---- echo = TRUE, eval = FALSE,  fig.width = 7, fig.height= 4----------------
#  ### Step 1: Impute B|X
#  imputes = mice::mice(observed_data, m=50, method="norm", printFlag=F, maxit = 1)
#  pred = imputes$predictorMatrix
#  pred[pred != 0] = 0
#  pred["B","X"] = 1
#  imputes = mice::mice(observed_data, m=50, predictorMatrix=pred, method="norm", printFlag=F)
#  
#  ### Step 2: Stack imputed datasets	
#  stack = mice::complete(imputes, action="long", include = FALSE)
#  
#  ### Step 3: Obtain weights
#  fit_cc = glm(Y ~ X + B, family='gaussian', data= complete_cases)
#  stack$wt = dnorm(stack$Y,mean = predict(fit_cc, newdata = stack), sd = sqrt(summary(fit_cc)$dispersion))
#  stack = as.data.frame(stack %>% group_by(.id) %>% mutate(wt = wt / sum(wt)))
#  
#  ### Step 4: Point estimation
#  fit = glm(Y ~X + B, data=stack, family=gaussian(), weights = stack$wt)
#  
#  ### Any one of the above variance estimation strategies can then be applied.

## ---- echo = TRUE, eval = FALSE,  fig.width = 7, fig.height= 4----------------
#  ### Step 2: Stack imputed datasets	
#  stack = mice::complete(imputes, action="long", include = FALSE)
#  cc = unique(stack$.id[stack$S == 1])
#  stack_short = rbind(stack[stack$S==0,], stack[stack$S==1 & !duplicated(stack$.id),])

## ---- echo = TRUE, eval = FALSE,  fig.width = 7, fig.height= 4----------------
#  ### Simulate Data
#  prob_obs = exp(2*B + 1*Y)/(1+exp(2*B + 1*Y))
#  S_mnar = as.numeric(prob_obs > runif(Nobs,0,1))
#  complete_cases = data.frame(Y, X, B, S=S_mnar)[S_mnar == 1,] #complete case subjects only
#  observed_data_mnar = data.frame(Y, X, B, S=S_mnar) #data with missingness in B
#  observed_data_mnar[S_mnar==0,'B'] = NA
#  
#  ### Step 1: Impute B|X,Y
#  imputes_mnar = mice::mice(observed_data_mnar, m=50, method="norm", printFlag=F, maxit = 1)
#  pred = imputes_mnar$predictorMatrix
#  pred[pred != 0] = 0
#  pred["B","X"] = 1
#  pred["B","Y"] = 1
#  imputes_mnar = mice::mice(observed_data_mnar, m=50, predictorMatrix=pred, method="norm", printFlag=F)
#  
#  ### Step 2: Stack imputed datasets
#  stack = mice::complete(imputes_mnar, action="long", include = FALSE)
#  
#  ### Step 3: Obtain weights
#  phi1_assumed = 2
#  stack$wt = exp(-phi1_assumed*stack$B)
#  stack = as.data.frame(stack %>% group_by(.id) %>% mutate(wt = wt / sum(wt)))
#  
#  ### Step 4: Point estimation
#  fit = glm(Y ~X + B, data=stack, family=gaussian(), weights = stack$wt)
#  
#  ### Any one of the above variance estimation strategies can then be applied.

## ---- echo = TRUE, eval = FALSE,  fig.width = 7, fig.height= 4----------------
#  ### Imputation Function (modified version of mice::mice.impute.mnar.norm())
#  mice.impute.mnar.norm2 = function (y, ry, x, wy = NULL, ums = NULL, umx = NULL, ...){
#    u <- mice:::parse.ums(x, ums = ums, umx = umx, ...)
#    if (is.null(wy))
#      wy <- !ry
#    x <- cbind(1, as.matrix(x))
#    parm <- mice:::.norm.draw(y, ry, x, ...)
#    return(x[wy, ] %*% parm$beta + as.matrix(u$x[wy, ]) %*% as.matrix(u$delta) + rnorm(sum(wy)) *parm$sigma)
#  }
#  
#  ### *Ideal* pattern mixture model offset parameter for these simulated data:
#  delta1_assumed = -0.087
#  
#  ### Step 1: Impute B|X,Y,S
#  mnar.blot <- list(B = list(ums =paste0('-',abs(delta1_assumed))))
#  imputes_pmm = mice::mice(observed_data_mnar, m=50, method="mnar.norm2", printFlag=F, maxit = 1, blots = mnar.blot)
#  pred = imputes_pmm$predictorMatrix
#  pred[pred != 0] = 0
#  pred["B","X"] = 1
#  pred["B","Y"] = 1
#  imputes_pmm = mice::mice(observed_data_mnar, m=50, predictorMatrix=pred, method="mnar.norm2", printFlag=F, blots = mnar.blot)
#  
#  ### Step 2: Apply Rubin's Rules to obtain point estimates and standard errors
#  fit = summary(mice::pool(with(imputes_pmm,glm(Y ~ X + B, family=gaussian()))))
#  param = fit$estimate
#  VARIANCE = (fit$std.error)^2