## ----setup, include=FALSE----------------------------------------------------- knitr::opts_chunk$set(echo = TRUE) ## ----betatab, include=TRUE, echo=FALSE---------------------------------------- bn = c( paste0("psi_", 0), paste0("beta_", 1:7), paste0("psi_", 2), paste0("eta_", 1:7)) bv = c(0, c(0.8, 0.6, 0.3, -0.3, -0.3, -0.3, 0), 0, c(1.0, 0.0, 0.0, 0.0, 0.2, 0.2, 0.2)) dt = data.frame(value=bv, row.names = bn) print(dt) ## ----intro_gen, include=TRUE-------------------------------------------------- library(qgcompint) set.seed(42) dat1 <- simdata_quantized_emm( outcometype="continuous", # sample size n = 300, # correlation between x1 and x2, x3, ... corr=c(0.8, 0.6, 0.3, -0.3, -0.3, -0.3), # model intercept b0=0, # linear model coefficients for x1, x2, ... at referent level of interacting variable mainterms=c(0.3, -0.1, 0.1, 0.0, 0.3, 0.1, 0.1), # linear model coefficients for product terms between x1, x2, ... and interacting variable prodterms = c(1.0, 0.0, 0.0, 0.0, 0.2, 0.2, 0.2), # type of interacting variable ztype = "binary", # number of levels of exposure q = 4, # residual variance of y yscale = 2.0 ) names(dat1)[which(names(dat1)=="z")] = "M" ## data head(dat1) ## modifier table(dat1$M) ## outcome summary(dat1$y) ## exposure correlation cor(dat1[, paste0("x", 1:7)]) ## ----first_step_fit, include=TRUE--------------------------------------------- qfit1 <- qgcomp.emm.glm.noboot(y~x1+x2+x3+x4+x5+x6+x7, data = dat1, expnms = paste0("x", 1:7), emmvar = "M", q = 4) qfit1 ## ----first_step_bound, include=TRUE------------------------------------------- pointwisebound(qfit1, emmval=0) pointwisebound(qfit1, emmval=1) ## ----first_step_plot, include=TRUE, fig.width=5, fig.height=4, fig.cap=paste("Weights for M =", 0:1)---- plot(qfit1, emmval=0) plot(qfit1, emmval=1) ## ----first_step_boot, include=TRUE-------------------------------------------- qfit1b <- qgcomp.emm.boot(y~x1+x2+x3+x4+x5+x6+x7, data=dat1, expnms = paste0("x", 1:7), emmvar = "M", q = 4) qfit1b ## ----first_step_boot_plot, include=TRUE, fig.width=6, fig.height=4, fig.cap=paste("Pointwise comparisons for M =", 0:1)---- plot(qfit1b, emmval=0) plot(qfit1b, emmval=1) ## ----first_step_boot_plot_scale, include=TRUE, fig.width=6, fig.height=4, fig.cap=paste("Pointwise comparisons (same scale) for M =", 0:1)---- library(ggplot2) # need to explicitly call ggplot pp0b = plot(qfit1b, emmval=0, suppressprint=TRUE, geom_only=TRUE, pointwisebars=TRUE, modelfitline=FALSE, modelband=FALSE) pp1b = plot(qfit1b, emmval=1, suppressprint=TRUE, geom_only=TRUE, pointwisebars=TRUE, modelfitline=FALSE, modelband=FALSE) # relabel the plots by directly modifying the ggplot2 geometries. # "col" is a column that sets the color # "point" and "errorbar" are names given to ggplot2::geom_point and # ggplot2::geom_error bar geometries pp0b$point$data$col = "M=0" pp1b$point$data$col = "M=1" pp0b$errorbar$data$col = "M=0" pp1b$errorbar$data$col = "M=1" # jitter along the x axis so the points do not overlap pp0b$point$data$x = pp0b$point$data$x - 0.01 pp1b$point$data$x = pp1b$point$data$x + 0.01 pp0b$errorbar$data$x = pp0b$errorbar$data$x - 0.01 pp1b$errorbar$data$x = pp1b$errorbar$data$x + 0.01 suppressMessages(print( ggplot() + pp0b + pp1b + scale_colour_viridis_d(name="") + labs(title="Bootstrap approach") )) ## ----first_step_ee, include=TRUE---------------------------------------------- qfit1ee <- qgcomp.emm.glm.ee(y~x1+x2+x3+x4+x5+x6+x7, data=dat1, expnms = paste0("x", 1:7), emmvar = "M", q = 4) qfit1ee ## ----first_step_ee_plot, include=TRUE, fig.width=6, fig.height=4, fig.cap=paste("Pointwise comparisons for M =", 0:1)---- pp0ee = plot(qfit1b, emmval=0, suppressprint=TRUE, geom_only=TRUE, pointwisebars=TRUE, modelfitline=FALSE, modelband=FALSE) pp1ee = plot(qfit1b, emmval=1, suppressprint=TRUE, geom_only=TRUE, pointwisebars=TRUE, modelfitline=FALSE, modelband=FALSE) # relabel the plots by directly modifying the ggplot2 geometries. # "col" is a column that sets the color # "point" and "errorbar" are names given to ggplot2::geom_point and # ggplot2::geom_error bar geometries pp0ee$point$data$col = "M=0" pp1ee$point$data$col = "M=1" pp0ee$errorbar$data$col = "M=0" pp1ee$errorbar$data$col = "M=1" # jitter along the x axis so the points do not overlap pp0ee$point$data$x = pp0ee$point$data$x - 0.01 pp1ee$point$data$x = pp1ee$point$data$x + 0.01 pp0ee$errorbar$data$x = pp0ee$errorbar$data$x - 0.01 pp1ee$errorbar$data$x = pp1ee$errorbar$data$x + 0.01 suppressMessages(print( ggplot() + pp0ee + pp1ee + scale_colour_viridis_d(name="") + labs(title="Estimating equations approach") )) ## ----catmod_gen, include=TRUE------------------------------------------------- set.seed(23) dat2 <- simdata_quantized_emm( outcometype="logistic", # sample size n = 300, # correlation between x1 and x2, x3, ... corr=c(0.6, 0.5, 0.3, -0.3, -0.3, 0.0), # model intercept b0=-2, # linear model coefficients for x1, x2, ... at referent level of interacting variable mainterms=c(0.1, -0.1, 0.1, 0.0, 0.1, 0.1, 0.1), # linear model coefficients for product terms between x1, x2, ... and interacting variable prodterms = c(0.2, 0.0, 0.0, 0.0, 0.2, -0.2, 0.2), # type of interacting variable ztype = "categorical", # number of levels of exposure q = 4, # residual variance of y yscale = 2.0 ) ## data head(dat2) ## modifier table(dat2$z) ## outcome table(dat2$y) ## exposure correlation cor(dat2[, paste0("x", 1:7)]) ## ----cat_mod_fit_wrong, include=TRUE------------------------------------------ qfit.wrong <- qgcomp.emm.glm.noboot(y~x1+x2+x3+x4+x5+x6+x7, data = dat2, expnms = paste0("x", 1:7), emmvar = "z", q = 4, family=binomial()) qfit.wrong ## ----cat_mod_fit, include=TRUE------------------------------------------------ dat2$zfactor = as.factor(dat2$z) # using asymptotic-based confidence intervals qfit2 <- qgcomp.emm.glm.noboot(y~x1+x2+x3+x4+x5+x6+x7, data = dat2, expnms = paste0("x", 1:7), emmvar = "zfactor", q = 4, family=binomial()) # using bootstrap based confidence intervals (estimate a) set.seed(12312) qfit2b <- qgcomp.emm.boot(y~x1+x2+x3+x4+x5+x6+x7, data = dat2, expnms = paste0("x", 1:7), emmvar = "zfactor", B = 10, # set higher when using in real data q = 4, family=binomial(), rr = FALSE) # using estimating equation based confidence intervals (estimate ee) set.seed(12312) qfit2ee <- qgcomp.emm.ee(y~x1+x2+x3+x4+x5+x6+x7, data = dat2, expnms = paste0("x", 1:7), emmvar = "zfactor", q = 4, family=binomial(), rr = FALSE) qfit2 qfit2b qfit2ee ## ----cat_mod_bound, include=TRUE, fig.width=5, fig.height=4------------------- ## output the weights at Z=0 getstratweights(qfit2, emmval=0) ## output pointwise comparisons at Z=0 pointwisebound(qfit2, emmval=0) ## plot weights at Z=0 plot(qfit2, emmval=0) ## output stratum specific joint effect estimate for the mixture at Z=2 print(getstrateffects(qfit2, emmval=2)) ## output the weights at Z=2 print(getstratweights(qfit2, emmval=2)) ## output pointwise comparisons at Z=2 pointwisebound(qfit2, emmval=2) plot(qfit2, emmval=2) ## output stratum specific joint effect estimate for the mixture at Z=2 from bootstrapped fit print(getstrateffects(qfit2b, emmval=2)) ## output pointwise comparisons at Z=2 from bootstrapped fit print(pointwisebound(qfit2b, emmval=2)) ## output modelwise confidence bounds at Z=2 from bootstrapped fit print(modelbound(qfit2b, emmval=2)) ## Plot pointwise comparisons at Z=2 from bootstrapped fit plot(qfit2b, emmval=2) ## ----first_step_ee_plot_scalecat, include=TRUE, fig.width=6, fig.height=4, fig.cap=paste("Pointwise comparisons (same scale) for M =", 0:1)---- library(ggplot2) # need to explicitly call ggplot pp0ee = plot(qfit2ee, emmval=0, suppressprint=TRUE, geom_only=TRUE, modelband=TRUE, pointwisebars=FALSE, modelfitline=TRUE) pp1ee = plot(qfit2ee, emmval=1, suppressprint=TRUE, geom_only=TRUE, modelband=TRUE, pointwisebars=FALSE, modelfitline=TRUE) pp2ee = plot(qfit2ee, emmval=2, suppressprint=TRUE, geom_only=TRUE, modelband=TRUE, pointwisebars=FALSE, modelfitline=TRUE) # relabel the plots uisn pp0ee$line$data$col = "Z=0" pp1ee$line$data$col = "Z=1" pp2ee$line$data$col = "Z=2" pp0ee$ribbon$data$col = "Z=0" pp1ee$ribbon$data$col = "Z=1" pp2ee$ribbon$data$col = "Z=2" # jitter along the x axis so the points do not overlap pp0ee$line$data$x = pp0ee$line$data$x - 0.01 pp2ee$line$data$x = pp2ee$line$data$x + 0.01 pp0ee$ribbon$data$x = pp0ee$ribbon$data$x - 0.01 pp2ee$ribbon$data$x = pp2ee$ribbon$data$x + 0.01 ggplot() + pp0ee + pp1ee + pp2ee + scale_color_viridis_d(name="") + scale_fill_viridis_d(name="", alpha=0.25) ## ----cat_mod_boundtest, include=TRUE, fig.width=5, fig.height=4--------------- library("qgcomp") dat2$zfactor = as.factor(dat2$z) catfitoverall <- qgcomp.glm.noboot(y~x1+x2+x3+x4+x5+x6+x7, data = dat2, expnms = paste0("x", 1:7), q = 4, family=binomial()) catfitemm <- qgcomp.emm.glm.noboot(y~x1+x2+x3+x4+x5+x6+x7, data = dat2, expnms = paste0("x", 1:7), emmvar = "zfactor", q = 4, family=binomial()) catfitoverallee <- qgcomp.glm.ee(y~x1+x2+x3+x4+x5+x6+x7, data = dat2, expnms = paste0("x", 1:7), q = 4, family=binomial()) catfitemmee <- qgcomp.emm.glm.ee(y~x1+x2+x3+x4+x5+x6+x7, data = dat2, expnms = paste0("x", 1:7), emmvar = "zfactor", q = 4, family=binomial(), rr=FALSE) anova(catfitoverall$fit, catfitemm$fit, test = "Chisq") anova(catfitemmee, catfitoverallee) ## ----catmod_gen_letter, include=TRUE------------------------------------------ set.seed(23) dat3 <- simdata_quantized_emm( outcometype="continuous", n = 300, corr=c(0.6, 0.5, 0.3, -0.3, -0.3, 0.0), b0=-2, mainterms=c(0.1, -0.1, 0.1, 0.0, 0.1, 0.1, 0.1), prodterms = c(0.2, 0.0, 0.0, 0.0, 0.2, -0.2, 0.2), ztype = "categorical", q = 4, yscale = 2.0 ) dat3$zletter = as.factor(with(dat3, ifelse(z==0, "a", ifelse(z==1, "b", "c")))) with(dat3, table(z, zletter)) qfit_letter <- qgcomp.emm.glm.noboot(y~x1+x2+x3+x4+x5+x6+x7, data = dat3, expnms = paste0("x", 1:7), emmvar = "zletter", q = 4, family=gaussian()) qfit_letteree <- qgcomp.emm.glm.ee(y~x1+x2+x3+x4+x5+x6+x7, data = dat3, expnms = paste0("x", 1:7), emmvar = "zletter", q = 4, family=gaussian()) print(qfit_letter) print(qfit_letteree) ## output stratum specific joint effect estimate for the mixture at Zletter="a" print(getstrateffects(qfit_letter, emmval="a")) print(getstrateffects(qfit_letteree, emmval="a")) ## output the weights at Z=2 print(getstratweights(qfit_letter, emmval="a")) ## output predictions at Zletter="b" print(pointwisebound(qfit_letter, emmval="b", pointwiseref = 1, alpha=0.05)) print(pointwisebound(qfit_letteree, emmval="b", pointwiseref = 1, alpha=0.05)) ## ----contmod_gen, include=TRUE------------------------------------------------ set.seed(23) dat3 <- simdata_quantized_emm( outcometype="continuous", # sample size n = 100, # correlation between x1 and x2, x3, ... corr=c(0.8, 0.6, 0.3, -0.3, -0.3, -0.3), # model intercept b0=-2, # linear model coefficients for x1, x2, ... at referent level of interacting variable mainterms=c(0.3, -0.1, 0.1, 0.0, 0.3, 0.1, 0.1), # linear model coefficients for product terms between x1, x2, ... and interacting variable prodterms = c(1.0, 0.0, 0.0, 0.0, 0.2, 0.2, 0.2), # type of interacting variable ztype = "continuous", # number of levels of exposure q = 4, # residual variance of y yscale = 2.0 ) names(dat3)[which(names(dat3)=="z")] = "CoM" head(dat3) summary(dat3$CoM) summary(dat3$y) cor(dat3[, paste0("x", 1:7)]) ## ----cont_mod_fit, include=TRUE----------------------------------------------- qfit3 <- qgcomp.emm.glm.noboot(y~x1+x2+x3+x4+x5+x6+x7, data = dat3, expnms = paste0("x", 1:7), emmvar = "CoM", q = 4) qfit3 qfit3b <- qgcomp.emm.boot(y~x1+x2+x3+x4+x5+x6+x7, data = dat3, expnms = paste0("x", 1:7), emmvar = "CoM", B=10, q = 4) qfit3b qfit3ee <- qgcomp.emm.ee(y~x1+x2+x3+x4+x5+x6+x7, data = dat3, expnms = paste0("x", 1:7), emmvar = "CoM", q = 4) qfit3ee ## ----cont_mod_bound, include=TRUE, fig.width=5, fig.height=4------------------ ## output/plot the weights at CoM=0") getstratweights(qfit3, emmval=0) plot(qfit3, emmval=0) ## output stratum specific joint effect estimate for the mixture at CoM=0 print(getstrateffects(qfit3, emmval=0)) ## output pointwise comparisons at CoM=0 print(pointwisebound(qfit3, emmval=0)) ## output/plot the weights at the 80%ile of CoM getstratweights(qfit3, emmval=quantile(dat3$CoM, .8)) plot(qfit3, emmval=quantile(dat3$CoM, .8)) ## output stratum specific joint effect estimate for the mixture at the 80%ile of CoM print(getstrateffects(qfit3, emmval=quantile(dat3$CoM, .8))) ## output pointwise comparisons at at the 80%ile of CoM print(pointwisebound(qfit3, emmval=quantile(dat3$CoM, .8), pointwiseref = 2)) ## ----cont_mod_bound_boot, include=TRUE, fig.width=5, fig.height=4------------- ## output stratum specific joint effect estimate for the mixture at CoM=0 #print(getstrateffects(qfit3b, emmval=0)) # commmented out to reduce output print(getstrateffects(qfit3ee, emmval=0)) ## output pointwise comparisons at CoM=0 #print(pointwisebound(qfit3b, emmval=0)) # commmented out to reduce output print(pointwisebound(qfit3ee, emmval=0)) ## output stratum specific joint effect estimate for the mixture at the 80%ile of CoM #print(getstrateffects(qfit3b, emmval=quantile(dat3$CoM, .8))) # commmented out to reduce output print(getstrateffects(qfit3ee, emmval=quantile(dat3$CoM, .8))) ## output pointwise comparisons at at the 80%ile of CoM #print(pointwisebound(qfit3b, emmval=quantile(dat3$CoM, .8))) # commmented out to reduce output print(pointwisebound(qfit3ee, emmval=quantile(dat3$CoM, .8))) ## plot the pointwise effects at CoM=0 (compare bootstrap with estimating equations approach) ## note that the bootstrapped version is not valid because of only using 10 bootstrap iterations ppb <- plot(qfit3b, emmval=0, suppressprint = TRUE, geom_only = TRUE) ppee <- plot(qfit3ee, emmval=0, suppressprint = TRUE, geom_only = TRUE) ppb$point$data$col = "Bootstrap" ppb$errorbar$data$col = "Bootstrap" ppee$point$data$col = "EE" ppee$errorbar$data$col = "EE" ppee$point$data$x = ppee$point$data$x + 0.01 ppee$errorbar$data$x = ppee$errorbar$data$x + 0.01 ggplot() + ppb + ppee + scale_color_grey(name="Pointwise estimates, 95% CI") ## ----cont_mod_nlfit, include=TRUE--------------------------------------------- qfit3bnl <- qgcomp.emm.glm.boot(y~x1+x2+x3+x4+x5+x6+x7 + x1*(x2 + x3 + x4 + x5 + x6 +x7), data = dat3, expnms = paste0("x", 1:7), emmvar = "CoM", B = 10, # set higher in practice q = 8, degree= 2) qfit3bnlee <- qgcomp.emm.glm.ee(y~x1+x2+x3+x4+x5+x6+x7 + x1*(x2 + x3 + x4 + x5 + x6 +x7), data = dat3, expnms = paste0("x", 1:7), emmvar = "CoM", q = 8, degree= 2) qfit3bnl qfit3bnlee ## ----cont_mod_nl_plot_base, include=TRUE, fig.width=6, fig.height=4----------- print(pointwisebound(qfit3bnlee, emmval=-1)) print(pointwisebound(qfit3bnlee, emmval=-1)) print(pointwisebound(qfit3bnlee, emmval=1)) plot(qfit3bnlee, emmval=-1, modelband=TRUE, pointwiseref=4) plot(qfit3bnlee, emmval=1, modelband=TRUE, pointwiseref=4) ## ----cont_mod_nl_plot, include=TRUE, fig.width=6, fig.height=4---------------- library(ggplot2) plotdata = data=data.frame(q=qfit3bnlee$index, ey=qfit3bnlee$y.expected, modifier=qfit3bnlee$emmvar.msm) ggplot() + geom_point(aes(x=q, y=ey, color=modifier), data=plotdata) + geom_point(aes(x=q, y=ey), color="purple", data=plotdata[plotdata$modifier>1, ], pch=1, cex=3) + geom_smooth(aes(x=q, y=ey), se=FALSE, color="purple", data=plotdata[plotdata$modifier>1, ], method = 'loess', formula='y ~ x') + geom_smooth(aes(x=q, y=ey), se=FALSE, color="red", data=plotdata[plotdata$modifier < -1, ], method = 'loess', formula='y ~ x') + geom_point(aes(x=q, y=ey), color="red", data=plotdata[plotdata$modifier < -1, ], pch=1, cex=3) + theme_classic() + labs(y="Expected outcome", x="Quantile score value (0 to q-1)") + scale_color_continuous(name="Value\nof\nmodifier") ## ----survmod_gen, include=TRUE------------------------------------------------ set.seed(23) dat4 <- simdata_quantized_emm( outcometype="survival", # sample size n = 200, # correlation between x1 and x2, x3, ... corr=c(0.8, 0.6, 0.3, -0.3, -0.3, -0.3), # model intercept b0=-2, # linear model coefficients for x1, x2, ... at referent level of interacting variable mainterms=c(0.0, -0.1, 0.1, 0.0, 0.3, 0.1, 0.1), # linear model coefficients for product terms between x1, x2, ... and interacting variable prodterms = c(0.1, 0.0, 0.0, 0.0, -0.2, -0.2, -0.2), # type of interacting variable ztype = "categorical", # number of levels of exposure q = 4, # residual variance of y yscale = 2.0 ) dat4$zfactor = as.factor(dat4$z) head(dat4) summary(dat4$zfactor) summary(dat4$time) table(dat4$d) # 30 censored cor(dat4[, paste0("x", 1:7)]) ## ----survival, include=TRUE--------------------------------------------------- qfit4 <- qgcomp.emm.cox.noboot(survival::Surv(time, d)~x1+x2+x3+x4+x5+x6+x7, data = dat4, expnms = paste0("x", 1:7), emmvar = "zfactor", q = 4) qfit4 plot(qfit4, emmval=0) getstratweights(qfit4, emmval=2) getstrateffects(qfit4, emmval=2) pointwisebound(qfit4, emmval=1)