## ----include = FALSE, warning=FALSE, message=FALSE---------------------------- knitr::opts_chunk$set( collapse = TRUE, warning = FALSE, message = FALSE, fig.width=4, fig.height=4, fig.align='center', comment = "#>" ) ## ----eval=FALSE--------------------------------------------------------------- # install.packages('BSTZINB') ## ----setup-------------------------------------------------------------------- library(BSTZINB) ## ----------------------------------------------------------------------------- data(county.adjacency) data(USAcities) library(dplyr) library(gt) library(gtsummary) IAcities <- USAcities %>% filter(state_id=="IA") countyname <- unique(IAcities$county_name) A <- get_adj_mat(county.adjacency,countyname,c("IA")) m <- apply(A,1,sum) ## ----------------------------------------------------------------------------- set.seed(091017) n <- nrow(A) # Number of spatial units nis <- rep(16,n) # Number of individuals per county; here it's balanced -- 12 per county per year # Note: may need to lower proposal variance, s, below as n_i increases sid <- rep(1:n,nis) tid <- rep(1:nis[1],n) N <- length(sid) # Total number of observations ## ----------------------------------------------------------------------------- library(spam) # library(mvtnorm) kappa <- 0.99 # Spatial dependence parameter ~ 1 for intrinsic CAR covm <- matrix(c(.5,.10,.10,-.10, .10,.15,.10,.10, .10,.10,.5,.10, -.10,.10,.10,.15),4,4)# Conditional Cov of phi1 and phi2 given phi_{-i} Q <- as.spam(diag(m)-kappa*A) covphi <- solve(Q)%x%covm # 3n x 3n covariance of phis phi <- rmvnorm(1,sigma=covphi) # 3n vector of spatial effects phitmp <- matrix(phi,ncol=4,byrow=T) # n x 3 matrix of spatial effects true.phi1 <- phi1 <- phitmp[,1]-mean(phitmp[,1]) # n x 1 phi1 vector -- Centered true.phi2 <- phi2 <- phitmp[,2]-mean(phitmp[,2]) # n x 1 phi2 vector, etc. true.phi3 <- phi3 <- phitmp[,3]-mean(phitmp[,3]) true.phi4 <- phi4 <- phitmp[,4]-mean(phitmp[,4]) Phi1 <- rep(phi1,nis) Phi2 <- rep(phi2,nis) Phi3 <- rep(phi3,nis) Phi4 <- rep(phi4,nis) true.phi <- cbind(true.phi1,true.phi2,true.phi3,true.phi4) ## ----------------------------------------------------------------------------- library(boot) tpop <- USAcities %>% filter(state_id=="IA") %>% group_by(county_fips) %>% summarise(tpop=population) %>% .[,2] %>% unlist %>% as.numeric x <- rep(log(tpop),nis) x <- (x-mean(x))/sd(x) # set.seed(2023); x = rnorm(N) t <- tid / max(tid) X <- cbind(1,x) # Design matrix, can add additional covariates (e.g., race, age, gender) Xtilde <- cbind(1,x,sin(t)) p <- ncol(Xtilde) ## ----------------------------------------------------------------------------- # Binomial part true.alpha <- rep(0.1,p) true.alpha[1:2] <- c(-0.25,0.25) # true.alpha <- c(-0.25,0.25,-0.5,0.25) eta1 <- Xtilde%*%true.alpha+Phi1+Phi2*t pii <- inv.logit(eta1) # 1-pr("structural zero") u <- rbinom(N,1,pii) # at-risk indicator N1 <- sum(u) pstruct0 <- 1-mean(u) # Proportion of structural zeros # Count Part true.beta <- rep(0.1,p) true.beta[1:2] <- c(.5,-.25) tmp <- u tmp[u==0] <- 0 # If in structural group then set tmp to 0 nis1 <- tapply(tmp,sid,sum) # Number of at risk observations in each county eta2 <- Xtilde[u==1,]%*%true.beta+Phi3[u==1]+Phi4[u==1]*t[u==1] # Linear predictor for count part true.r <- 1.25 # NB dispersion psi <- exp(eta2)/(1+exp(eta2)) # Prob of success mu <- true.r*psi/(1-psi) # NB mean ## ----------------------------------------------------------------------------- y <- rep(0,N) # Response y[u==1] <- rnbinom(N1,size=true.r,mu=mu)# If at risk, draw from NB pzero <- length(y[y==0])/N # Proportion of zeros # pzero # pstruct0/pzero # table(y) # mean(mu) # quantile(tapply(u,id,sum)) # Number of at-risk observations per county *for all 5 years* # quantile(y) this_dat <- data.frame(sid,tid,y,X) head(this_dat) tbl_summary(this_dat) %>% modify_header(label = "**Coefficients**") ## ----------------------------------------------------------------------------- USDmapCount(state.sel = c("IA"), dat = this_dat, scol = 1, tcol = 2, cname = countyname, uplim=150) ## ----------------------------------------------------------------------------- # blue histogram tmp <- table(y)/N*100 # convert to %s (divide by N multiply by 100) oldpar <- par(no.readonly = TRUE) par(mar=c(3,3,1,1)) barplot(tmp, ylab="Percent",xlab="Count",col="lightblue") # sphagetti plot library(CorrMixed) # Plot individual profiles + mean Spaghetti.Plot(Dataset = this_dat, Outcome = y, Id=sid, Time = tid) par(oldpar) # Moran's I library(ape) library(reshape) this_dat2 <- cast(this_dat,sid~tid,sum,value="y") Moran.I(rowMeans(this_dat2),A) this_dat2 %>% apply(2,function(w) Moran.I(w,A)) %>% lapply(function(list) list$p.value) %>% unlist Moran.I(rowMeans(this_dat2),A) ## ----------------------------------------------------------------------------- USDmapCount(state.sel = c("IA"), dat = this_dat, scol = 1, tcol = 2, tsel = 3, cname = countyname) ## Timepoint 3 USDmapCount(state.sel = c("IA"), dat = this_dat, scol = 1, tcol = 2, tsel = 6, cname = countyname) ## Timepoint 6 USDmapCount(state.sel = c("IA"), dat = this_dat, scol = 1, tcol = 2, tsel = 9, cname = countyname) ## Timepoint 9 USDmapCount(state.sel = c("IA"), dat = this_dat, scol = 1, tcol = 2, tsel = 12, cname = countyname) ## Timepoint 12 ## ----------------------------------------------------------------------------- res0 <- BNB(y, X, A, nchain=2, niter=50, nburn=10) glimpse(res0) res1 <- BZINB(y, X, A, nchain=2, niter=50, nburn=10) glimpse(res1) apply(res0$Beta,2,mean) apply(res1$Beta,2,mean) ## ----------------------------------------------------------------------------- res2 <- BSTNB(y, X, A, nchain=2, niter=50, nburn=10) glimpse(res2) ## ----fig.width=6,fig.height=3,fig.align='center'------------------------------ res3 <- BSTZINB(y, X, A, LinearT=TRUE, nchain=2, niter=50, nburn=10, nthin=1) glimpse(res3) conv.test(res3$Alpha) ## Convergence test for Alpha parameters conv.test(res3$Beta) ## Convergence test for Beta parameters conv.test(res3$R) ## Convergence test for R parameters ## Plotting the chains for the parameters oldpar <- par(no.readonly = TRUE) par(mfrow=c(1,2));plot(res3$R[,1],main="R");plot(res3$R[,2],main="R") par(mfrow=c(1,2));plot(res3$Alpha[,1,1],type='o',main="Alpha");plot(res3$Alpha[,1,2],type='o',main="Alpha") par(oldpar) ## Checking the estimated beta with the true beta apply(res3$Beta,2,mean) true.beta ## Computing DIC compute_ZINB_DIC(y,res3,dim(res3$Beta)[1],2) ## ----fig.width=6,fig.height=3,fig.align='center'------------------------------ res4 <- BSTZINB(y, X, A, LinearT=FALSE, nchain=2, niter=50, nburn=10) glimpse(res4) ## Convergence tests conv.test(res4$Alpha) ## Alpha parameters conv.test(res4$Beta) ## Beta parameters conv.test(res4$R) ## R parameters oldpar <- par(no.readonly = TRUE) par(mfrow=c(1,2));plot(res4$R[,1],main="R");plot(res4$R[,2],main="R") par(mfrow=c(1,2));plot(res4$Alpha[,1,1],type='o',main="Alpha");plot(res4$Alpha[,1,2],type='o',main="Alpha") par(oldpar) ## Posterior means for beta and R apply(res4$Beta,2,mean) mean(res4$R) ## DIC compute_ZINB_DIC(y,res4,dim(res4$Beta)[1],2) ## ----------------------------------------------------------------------------- ResultTableSummary(res3) ## ----------------------------------------------------------------------------- ResultTableSummary2(y, X, A, nchain=2, niter=50, nburn=10, nthin=1) ## ----------------------------------------------------------------------------- # Map Visualization : Time-averaged Eta this_result_dat <- this_dat this_result_dat$y <- apply(res4$Eta1,2,mean) this_result_dat %>% head USDmapCount(state.sel = c("IA"), dat = this_result_dat, scol = 1, cname = countyname) ## ----------------------------------------------------------------------------- # Map Visualization : Spatio-temporal Random Effect of Eta this_result_dat <- this_dat Phi1 <- rep(apply(res4$PHI1,2,mean),nis) Phi2 <- rep(apply(res4$PHI2,2,mean),nis) tt <- this_dat$tid / max(this_dat$tid) this_result_dat$y <- Phi1+Phi2*tt USDmapCount(state.sel = c("IA"), dat = this_result_dat, scol = 1, tcol = 2, cname = countyname) # Map Visualization : Temporal Fixed Effect of Eta this_result_dat <- this_dat Phi1 <- rep(apply(res4$PHI1,2,mean),nis) Phi2 <- rep(apply(res4$PHI2,2,mean),nis) tt <- this_dat$tid / max(this_dat$tid) this_result_dat$y <- apply(res4$Eta1,2,mean) - Phi1+Phi2*tt USDmapCount(state.sel = c("IA"), dat = this_result_dat, scol = 1, tcol = 2, cname = countyname) ## ----------------------------------------------------------------------------- # Map Visualization : Time-averaged probability at risk of disease this_result_dat <- this_dat this_result_dat$y <- inv.logit(apply(res4$Eta1,2,mean)) this_result_dat %>% head USDmapCount(state.sel = c("IA"), dat = this_result_dat, scol = 1, tcol = 2, cname = countyname) # Map Visualization : Time-specified probability at risk of disease this_result_dat <- this_dat this_result_dat$y <- inv.logit(apply(res4$Eta1,2,mean)) this_result_dat %>% head USDmapCount(state.sel = c("IA"), dat = this_result_dat, scol = 1, tcol = 2, tsel = 1, cname = countyname) ## ----------------------------------------------------------------------------- # Bar Plot of vn-quantile Probability at Risk of disease qRankPar(state.set=c("IA"),cname=countyname,bstfit=res3,vn=12, cex.title=18, cex.lab=12, cex.legend=12) # Bar Plot of Top vn number of Probability at Risk of disease qRankParTop(state.set=c("IA"),cname=countyname,bstfit=res3,vn=12, cex.title=18, cex.lab=12, cex.legend=12) ## ----------------------------------------------------------------------------- # Line Plot of Nonlinear Time effect abundance TimetrendCurve(res4,cname=countyname,vn=3,smooth.mode=FALSE, cex.title=18, cex.lab=12, cex.legend=12) # Line Plot of Nonlinear Time effect abundance (smoothed version) TimetrendCurve(res3,cname=countyname,vn=5,smooth.mode=TRUE, cex.title=18, cex.lab=12, cex.legend=12)