%\VignetteEngine{knitr::knitr} %\VignetteIndexEntry{Examples in statistical ecology} %\VignetteDepends{tramME, gamm4, gamlss.dist, survival, mgcv, glmmTMB, xtable, multcomp} \documentclass[11pt]{article} \usepackage{graphicx} \usepackage[margin=0.8in]{geometry} \usepackage[margin=0.5in, font=small]{caption} \usepackage{amsfonts,amstext,amsmath,amssymb,amsthm} \usepackage[utf8]{inputenc} \usepackage[comma,authoryear]{natbib} \usepackage{xcolor} \usepackage[colorlinks=true, urlcolor=black, linkcolor=blue, citecolor=blue]{hyperref} \usepackage{booktabs} \usepackage{doi} \usepackage[inline]{enumitem} \renewcommand{\tt}[1]{\texttt{#1}} \newcommand{\todo}[1]{\textbf{\color{red} TODO: #1}} \newcommand{\note}[1]{\textbf{\color{orange} NOTE: #1}} \newcommand{\code}[1]{\texttt{#1}} \newcommand{\pkg}[1]{\texttt{#1}} \newcommand{\CRANpkg}[1]{\href{https://CRAN.R-project.org/package=#1}{\pkg{#1}}}% % Math notation \newcommand{\IP}{{\mathbb{P}}} \newcommand{\IR}{{\mathbb{R}}} \newcommand{\IE}{{\mathbb{E}}} \newcommand{\U}{{\mathbf{U}}} \newcommand{\X}{{\boldsymbol{X}}} \newcommand{\bH}{{\boldsymbol{H}}} \newcommand{\bS}{{\boldsymbol{S}}} \newcommand{\x}{{\boldsymbol{x}}} \newcommand{\Y}{{\boldsymbol{Y}}} \newcommand{\y}{{\boldsymbol{y}}} \newcommand{\z}{{\boldsymbol{z}}} % \newcommand{\bcB}{{\boldsymbol{\mathcal{B}}}} % \newcommand{\bcb}{{\boldsymbol{\mathcal{b}}}} \newcommand{\ba}{{\boldsymbol{a}}} \newcommand{\bb}{{\boldsymbol{b}}} \newcommand{\bbeta}{{\boldsymbol{\beta}}} \newcommand{\bgamma}{{\boldsymbol{\gamma}}} \newcommand{\bdelta}{{\boldsymbol{\delta}}} \newcommand{\blambda}{{\boldsymbol{\lambda}}} \newcommand{\bvartheta}{{\boldsymbol{\vartheta}}} \newcommand{\bGamma}{{\boldsymbol{\Gamma}}} \newcommand{\bSigma}{{\boldsymbol{\Sigma}}{}} \newcommand{\vect}[1]{\boldsymbol{#1}} \newcommand{\cN}{{\mathcal{N}}} \newcommand{\cL}{{\mathcal{L}}} \newcommand{\0}{{\mathbf{0}}} \newcommand{\T}{^{\top}} \renewcommand{\d}{\mathsf{\,d}} \usepackage{accents} \newcommand{\ubar}[1]{\underaccent{\bar}{#1}} %%%%% Section titles, table, figure, equation labels %% Sections with A, B, C etc \let\thesectionWithoutS\thesection %% <- store old definition \renewcommand\thesection{S\thesectionWithoutS} %% equation, figure, table references prepended with S \let\theequationWithoutS\theequation %% <- store old definition \renewcommand\theequation{S\theequationWithoutS} \let\thefigureWithoutS\thefigure %% <- store old definition \renewcommand\thefigure{S\thefigureWithoutS} \let\thetableWithoutS\thetable %% <- store old definition \renewcommand\thetable{S\thetableWithoutS} %%%%%% Column header formatting in xtables %% https://stackoverflow.com/a/33237779/10826854 \usepackage{array} \newcolumntype{R}[1]{>{\raggedleft\let\newline\\\arraybackslash\hspace{0pt}}p{#1}} \setlength{\parskip}{0.5em} \renewcommand{\baselinestretch}{1.2} % \newcommand{\address}[1]{ % \addvspace{\baselineskip}\noindent % \textbf{Affiliations:} \\ % \emph{#1}} % \newcommand{\email}[1]{\href{mailto:#1}{\normalfont\texttt{#1}}} <>= knitr::opts_chunk$set(size = "small", prompt = TRUE, comment = NA, out.width=".9\\linewidth") knitr::knit_hooks$set( document = function(x) {sub('\\usepackage[]{color}', '\\usepackage{xcolor}', x, fixed = TRUE)} ) oldpar <- par(no.readonly = TRUE) ## NOTE: for setting back at the end oldopt <- options() options(prompt = "R> ", continue = "+ ") options(width = 80, digits = 3) ## Dependencies library("tramME") library("survival") library("mgcv") library("glmmTMB") library("xtable") library("gamm4") @ \title{\textbf{Online Appendix:}\\ Flexible Regression for Correlated and Censored Ecological Data with Mixed-Effects Additive Transformation Models } \author{B\'alint Tam\'asi \\ {\small \url{Balint.Tamasi@uzh.ch}} \and Torsten Hothorn \\ {\small \url{Torsten.Hothorn@R-project.org}} } \date{16/3/2023} \begin{document} \maketitle \abstract{ This vignette serves as the online appendix for the manuscript \emph{``Flexible Regression for Correlated and Censored Ecological Data with Mixed-Effects Additive Transformation Models''}. %% \citet{Tamasi_2022}. It presents details on the estimation and inference in the titular model class, four example analyses that % use mixed-effects additive transformation models to reanalyze use these models to reanalyze ecological phenomena from recently published studies, as well as a small simulation exercise to verify the correctness of the implementation. } \section{Estimation and inference}\label{sec:inference} In this section, we give a brief summary on maximum likelihood estimation and inference in mixed-effects additive transformation models. The model can be written in its general form as % \begin{align} \IP\left(Y_{i}\leq y \mid \x_{i}, \z_{i}, \bgamma \right) &= F\left(h(y) + f_{1}(x_{1i}) + f_{2}(x_{2i}, x_{3i}) + \dots + \x_{+i}\T\bbeta + \z_{i}\T\bgamma\right) \qquad \bgamma \sim \cN_{q}(\0, \bSigma), \label{eq:memodel} \end{align} % with $F(\cdot)$ inverse link function and $h(y)$ baseline transformation function. Each smooth term ($f_{j}(\cdot)$) can be represented with the help of basis functions $f_{j}(x_{ji}) = \bb_{j}(x_{i})\T\bdelta_{j}$. Penalties on the $\bdelta_{j}$ parameters can be added to the model to prevent overfitting in the general form of $\lambda_{j}\bdelta_{j}\T\bS_{j}\bdelta_{j}$. $\lambda_{j}$ is the smoothing parameter that controls the amount of penalty imposed on the $j$th term and thus controls the smoothness of $f_{j}(\cdot)$, while the penalty matrix $\bS_{j}$ defines how exactly the parameters $\bdelta_{j}$ are penalized. The log-likelihood of Model~\ref{eq:memodel} can be formulated with the help of appropriate penalty terms for the smooth effects as well as the random effects % \begin{align} \ell(\bvartheta, \bbeta, \bSigma, \blambda) &= \ell^{\star}(\bvartheta, \bbeta, \bgamma, \bdelta) - \frac{1}{2} \sum_{j=1}^{J}\lambda_{j}\bdelta_{j}\T\bS_{j}\bdelta_{j} - \frac{1}{2} \bgamma\T \bSigma^{-1}\bgamma + c, \label{eq:loglik} \end{align} % where $\ell^{\star}(\bvartheta, \bbeta, \bgamma, \bdelta) = \sum_{i=1}^{N}\ell_{i}^{\star}(\bvartheta, \bbeta, \bgamma, \bdelta)$ is the unpenalized or conditional (on the vector of random effects and penalized coefficients) log-likelihood, which can be written as the sum of the log-likelihood contributions of the individual observations, assuming conditional independence. $\blambda$ denotes the vector of smoothing parameters corresponding to $f_{1}(\cdot), f_{2}(\cdot), \dots, f_{J}(\cdot)$ smooth terms, and $\bdelta = \left(\bdelta_{1}\T, \bdelta_{2}\T, \dots, \bdelta_{J}\T\right)\T$. The covariance matrix of the stacked vector of random effects ($\bSigma$) is positive definite and typically has a block structure, which can be leveraged in the calculations. The term $c$ is a constant, which does not depend on the parameters (either fixed or random) of the model. The conditional log-likelihood contributions of the observations, given that they are exactly observed can be written as % \begin{align*} \ell_{i}^{\star}(\bvartheta, \bbeta, \bgamma, \bdelta) &= \log F'\left(h(y_{i}) + f_{1}(x_{1i}) + f_{2}(x_{2i}, x_{3i}) + \dots + \x_{+i}\T\bbeta + \z_{i}\T\bgamma\right) + \log h'(y_{i}) \end{align*} % where $F'(\cdot)$ and $h'(\cdot)$ are the derivative functions of $F(\cdot)$ and $h(\cdot)$, respectively. Because we directly parameterize the cumulative distribution function of the outcome in Equation~\ref{eq:memodel}, it is straightforward to express the log-likelihood contributions of randomly censored observations: % \begin{align} \ell_{i}^{\star}(\bvartheta, \bbeta, \bgamma, \bdelta) &= \log\left(\IP\left(Y_{i}\leq \bar{y}_{i} \mid \x_{i}, \z_{i}, \bgamma \right) - \IP\left(Y_{i}\leq \ubar{y}_{i} \mid \x_{i}, \z_{i}, \bgamma \right) \right), \label{eq:ic} \end{align} % for an interval-censored observation on $\ubar{y}_{i}\leq y_{i} < \bar{y}_{i}$. The contribution of left ($-\infty < y_{i} < \bar{y}_{i}$) and right-censored ($\ubar{y}_{i} \leq y_{i} < \infty$) observations can be expressed by setting the second part of this expression to 0 and the first part to 1, respectively. Likelihood-based inference in the family of mixed-effects transformation models is presented by \citet{Tamasi_Hothorn_2021}. The estimation procedure described there is directly applicable in the setting with penalized smooth additive terms utilizing their mixed model representation \citep{Ruppert_2003, Wand_smoothing_2003}. In the case of some smoothing bases, a reparameterization is necessary to fit into a mixed model-based estimation framework (\citealp[Appendix]{Wood_2004} and \citealp[Chapter~8]{Fahrmeir_2013}). The marginal log-likelihood function of the model can be efficiently evaluated by integrating over the vector of random effects using the Laplace approximation (see Section~\ref{sec:compute} for details). In the mixed-effects representation, the smoothing parameters can be estimated as variance components of the mixed model via maximum likelihood or restricted maximum likelihood (REML). Frequentist inference for the penalized smooth terms can be based on the joint covariance matrix estimate of fixed and random parameters using the generalized delta method \citep{Kristensenetal_2016}. This estimate approximates the mean squared error of the random effects predictions \citep{Zheng_Cadigan_2021}, and it is also in agreement with results derived in a Bayesian setting for independent hierarchical models \citep{Kass_Steffey_1989}. The effective degrees of freedom (EDF) of the individual smooth terms give an approximation of the effective number of parameters used in modeling these penalized smooth functions. Following \citet{Wood_2016} and \citet[][Section~6.11]{Wood_gam_2017}, the EDFs can be estimated by summing up the appropriate elements of the diagonal of the matrix $V\mathcal{I}$, where $V$ is the joint variance-covariance matrix of all unpenalized (i.e., fixed-effects) and penalized (random) parameters. The matrix $\mathcal{I}$ is the negative Hessian of the \emph{unpenalized} log-likelihood ($\ell^{\star}(\bvartheta, \bbeta, \bgamma, \bdelta)$ in Equation~\ref{eq:loglik}), evaluated at the vector of parameter estimates. \section{Computational details}\label{sec:compute} \pkg{tramME} offers several methods to approximate the baseline transformation function ($h(y)$ in Equation~\ref{eq:memodel}) of additive mixed-effects transformation models \citep[see][for the available options]{Tamasi_Hothorn_2021}. Similarly to \citet{Hothornetal_2018}, the \emph{Bernstein polynomial basis} \citep{Farouki_2012} is used for a general and flexible parameterization for continuous outcomes: % \begin{align*} h(y) &= \ba_{\text{Bs},M}(y)\T\bvartheta = \frac{1}{M+1}\sum_{m=0}^{M}\vartheta_{m} \binom{M}{m}\tilde y^{m}(1-\tilde y)^{M-m}, \end{align*} % where $\ba_{\text{Bs},M}(y)$ denotes the set of order $M$ polynomials in Bernstein form, and $\tilde y$ equals to $y$ scaled to the interval $[0, 1]$. In practice, the exact order of the polynomials in Bernstein form usually does not have a large effect on the results beyond a minimal value. \citet{Hothorn_mlt_2020} discusses the choice of order in fixed effects-only transformation models and recommends choosing $M$ between 5 and 10. In the application of the main article, we used the order of 6 as a default. Likelihood-based estimation and inference in mixed-effects additive transformation models is implemented by the \textsf{R} add-on package \pkg{tramME} \citep{Tamasi_Hothorn_2021}. The package utilizes existing implementations of linear transformation models \citep[\pkg{mlt} and \pkg{tram} packages by][]{Hothorn_mlt_2020} for the user interface and to set up inputs for the estimation procedure. The penalized smooth terms are defined with the help of the \pkg{mgcv} package \citep{Wood_gam_2017}, which provides functionality to reformulate these terms to their mixed-effects representations as described by \citet[Appendix]{Wood_2004}. \pkg{tramME} uses the Template Model Builder package \citep[\pkg{TMB},][]{Kristensenetal_2016} to evaluate the log-likelihood via Laplace approximation and to calculate the gradients of it with automatic differentiation \citep[\pkg{glmmTMB}, by][uses the same underlying techniques]{Brooks_2017}. Consequently, model estimation and inference in \pkg{tramME} is fast and efficient. The user interface of \pkg{tramME} follows standard model specification conventions of the most popular \textsf{R} packages (\pkg{lme4}, \citealp{Bates_lme4_2015}, \pkg{mgcv}, \citealp{Wood_gam_2017}) to define mixed-effects additive transformation models. Thus, transition from a \texttt{gam}, \texttt{lmer}, or \texttt{glmer} model is very simple. For example, a mixed-effects additive parametric proportional hazards model can be defined with the function call % <>= ## tramME is available from CRAN: ## install.packages("tramME") library("tramME") mc1 <- CoxphME( ### conditional proportional hazards Time ~ ### response time intervals Insects + ### fixed effects Habitat + Landscape + s(Temperature, k = 20) + ### non-linear terms, as in mgcv s(Elevation100, k = 20) + (1 | PlotID), ### random intercept, as in lme4 data = carrion, ### data log_first = TRUE, ### log(Time) before modeling order = 6 ### order of Bernstein ) @ % \section{The rat carrion decomposition experiment} <>= carrion <- read.csv("carrion.csv") carrion$Time <- with(carrion, Surv(time1, time2, type = "interval2")) carrion$Time_rc <- with(carrion, Surv(ifelse(is.finite(time2), time2, time1), event = is.finite(time2))) carrion$Insects <- factor(carrion$Insects, labels = c("no", "yes")) carrion$Habitat <- factor(carrion$Habitat) carrion$Habitat <- relevel(carrion$Habitat, ref = "forest") carrion$Landscape <- factor(carrion$Landscape) carrion$Landscape <- relevel(carrion$Landscape, ref = "seminatural") carrion$PlotID <- factor(carrion$PlotID) @ \begin{figure}[!ht] \centering <>= par(mfrow = c(1, 2)) ## -- Plot 1 par(cex = 0.9, mar = c(5, 4, 2, 1)) idx <- 140:155 dpl <- carrion$Time[idx] plot(0, type = "n", ylim = range(idx) + c(-0.5, 0.5), xlim = c(0, 120), ylab = "ID", xlab = "Follow-up time (days)", yaxt = "n") abline(h = idx, v = seq(0, 120, by = 20), col = "lightgrey", lty = 3) axis(2, at = idx, las = 2) ic <- dpl[, 3] > 0 segments(y0 = idx[ic], x0 = dpl[ic, 1], x1 = dpl[ic, 2], lwd = 2) arrows(y0 = idx[!ic], x0 = dpl[!ic, 1], x1 = 123, length = 0.1, lwd = 2) ## -- Plot 2 par(cex = 0.9, mar = c(5, 4, 2, 1)) dpl <- carrion$Time[carrion$Time[, 3] > 0] ## exclude right censored plot(ecdf(dpl[, 2] - dpl[, 1]), xlab = "Interval length (days)", main = NULL, verticals = TRUE, las = 1, ylab = "Probability") grid() @ \caption{ Censoring in the carrion decomposition dataset of \protect\citet{Englmeier2022}. \emph{Left panel}: Examples of censoring types. Segments with arrows denote right-censored observations, other segments show interval-censored outcomes. \emph{Right panel}: The distribution of the interval lengths of interval-censored observations. }\label{fig:carrion-cens} \end{figure} \begin{figure}[!ht] \centering <>= sv <- survfit(Time ~ Insects, data = carrion) par(cex = 0.8, mar = c(4, 4.2, 2, 1), las = 1) plot(sv, lty = c(1, 2), lwd = 2, xlab = "", ylab = "P(T>t)") title(xlab = "Follow-up time (days)", line = 2.3) grid() legend("topright", c("Insect = no", "Insect = yes"), lty = c(1, 2), lwd = 2, bty = "n") @ \caption{Non-parametric survival probability estimates associated with the decomposition times in groups where insect access was and was not allowed in the carrion decomposition experiment.} \label{fig:carrion-surv} \end{figure} Our first example presents the reanalysis of the carrion decomposition data by \citet{Englmeier2022}. In the original study, the authors analyzed the environmental factors that affect the decomposition process of small rodent carrion using data from an experiment, in which they placed rat carcasses in different environments and recorded the time until complete decomposition. As we describe it in the main article, the outcome variable (time until complete decomposition) is \emph{interval censored} due to the discrete follow-up. Figure~\ref{fig:carrion-cens} presents some examples of the censored outcomes in the dataset. Segments with arrows denote right-censored observations, i.e., cases, where the total decomposition did not occur by the time of the last visit. As these examples demonstrate, the sites were visited at regular intervals, albeit we can see some variability in the lengths. The survival curves for specimen with and without the presence of insects are shown in Figure~\ref{fig:carrion-surv}. Based on this marginal view of the data, i.e., without adjusting for other important factors and ignoring cluster-level heterogeneity, the presence of insects accelerated the decomposition process. The 25\textsuperscript{th}, 50\textsuperscript{th} and 75\textsuperscript{th} percentiles in the cases where insects were present were \Sexpr{paste(round(quantile(sv, c(0.25, 0.5, 0.75))$quantile[2, ], 3), collapse = ", ")} %$ days, respectively, while, without insects, the same quantiles amounted to \Sexpr{paste(round(quantile(sv, c(0.25, 0.5, 0.75))$quantile[1, ], 3), collapse = ", ")} %$ days. The main environmental variables we include in our model are the indicator for the presence of insects (\texttt{Insects}), local (\texttt{Habitat}) and regional (\texttt{Landscape}) land use types, The average temperature on each experimental plot was measured with thermologgers (\texttt{Temperature}), and the elevational gradient was used as a surrogate for the long-term macroclimate (\texttt{Elevation100}). The plot-level unobserved sources of variability were modeled by including random intercepts (grouping variable: \texttt{PlotID}). The outcome variable (\texttt{Time}) is a predefined \code{Surv} object that encodes the interval-censored decomposition times. % <>= head(carrion$Time, 10) @ We estimate the following proportional-hazards mixed-effect additive transformation model for the decomposition intervals % <>= dcmp <- CoxphME(Time ~ Insects + Habitat + Landscape + s(Temperature, k = 20) + s(Elevation100, k = 20) + (1 | PlotID), data = carrion, log_first = TRUE, order = 6) summary(dcmp) @ % The smooth terms can be evaluated and plotted with % <>= plot(smooth_terms(dcmp), panel.first = grid()) @ % As the results in Figure~\ref{fig:carrion-smooth} show, the effects of temperature and elevation look fairly linear. \begin{figure}[!ht] \centering <>= par(mar = c(4, 4, 1, 1)) <> @ \caption{Smooth effects of the continuous variables in the model of the decomposition times.}\label{fig:carrion-smooth} \end{figure} As a general check of the appropriateness of the estimated model, we can evaluate the \emph{marginal} distribution function or, as it is more commonly done in survival analysis, the marginal survivor function of the outcomes at the observations in our dataset by integrating over the random effects numerically. % \begin{align*} \widehat S(t_{i}\mid\x_{i}) &= \widehat \IP(T_{i} > t_{i}\mid\x_{i}) = \int_{-\infty}^{+\infty}\widehat\IP(T_{i} > t_{i}\mid\x_{i},\gamma)\phi(\gamma)\d\gamma. \end{align*} % $\widehat S(t_{i}\mid\x_{i})$ denotes the fitted survivor function, which is straightforward to calculate from the mixed-effects additive transformation model, due to its fully parametric approach to approximate the outcome distribution. By evaluating $-\log(\widehat S(t_{i}\mid\x_{i}))$ at the observations in our dataset, we get the Cox-Snell residuals \citep[Chapter~11]{Klein_Moeschberger_2003}, which are unit exponentially distributed under the correct model. Interval-censored outcomes pose a technical difficulty in assessing the Cox-Snell residuals. In this situation we can either evaluate the marginal survivor function at the upper and lower bounds of the censoring intervals and assess the interval-censored version of the residuals, or we can apply the adjustment proposed by \citet{Farrington_2000}, which replaces these intervals with expected values under unit exponential distribution. To assess the distribution of the residuals, we estimate the cumulative hazard function of the Cox-Snell residuals, which should be close to a straight line with unit slope through the origin under the correct model. Figure~\ref{fig:resid} presents the distributions of the Cox-Snell residuals using the two approaches to interval censoring. Neither of these plots signals serious departures from the unit exponential distribution, which confirms the appropriateness of our regression model. Because the marginal Cox-Snell residuals are not independent in our case, these plots only provide a crude visual check of the model fits. <>= ## From Surv object to a matrix with left and right censoring values get_bounds <- function(x) { stopifnot(is.Surv(x)) lb <- x[, 1] ub <- x[, 2] ty <- x[, 3] ub[ty == 2] <- lb[ty == 2] lb[ty == 2] <- -Inf ub[ty == 0] <- Inf ub[ty == 1] <- lb[ty == 1] cbind(lb, ub) } ## Evaluate the marginal survivor function marginalize <- function(model, data, type = "survivor", lower = -Inf, upper = Inf) { ## fun(y|x,g) * density(g) joint <- function(re, nd, mod, type) { nd <- nd[rep(1, length(re)), ] nd[[variable.names(mod, "grouping")]] <- seq(nrow(nd)) ## to take vector-valued REs pr <- predict(mod, newdata = nd, ranef = re, type = type) * dnorm(re, 0, sd = sqrt(varcov(mod)[[1]][1, 1])) c(pr) } ## integrate out the random effects row by row res <- parallel::mclapply(split(data, seq(nrow(data))), function(nd) { out <- integrate(joint, lower = lower, upper = upper, nd = nd, mod = model, type = type) if (out$message == "OK") return(out$value) return(NA) }, mc.cores = 8) unlist(res) } ## Cox-Snell residuals ## type: ## ic: Evaluate the cumulative hazard at the left and right censoring values ## and return residuals as interval-censored. ## adj: Adjusted CS resid (Farrington,2000 ), ## replace the intervals with the expected values under unit exponential on these ## intervals (under the assumption of correct model fit) CSresid <- function(model, data = model.frame(model), type = c("both", "adj", "ic")) { type <- match.arg(type) rv <- variable.names(model, "response") if (!is.Surv(data[[rv]])) data[[rv]] <- Surv(data[[rv]]) bns <- get_bounds(data[[rv]]) stopifnot(all(is.finite(bns[,1]))) data[[rv]] <- bns[, 1] Sl <- marginalize(model, data, type = "survivor") Su <- numeric(length = nrow(bns)) ie <- bns[,1] == bns[,2] Su[ie] <- Sl[ie] Su[ir <- is.infinite(bns[,2])] <- 0 ii <- !(ie | ir) data[[rv]] <- bns[, 2] Su[ii] <- marginalize(model, data[ii, ], type = "survivor") res_ic <- Surv(-log(Sl), -log(Su), type = "interval2") if (type == "ic") return(res_ic) res <- numeric(length = nrow(bns)) res[ie] <- -log(Sl[ie]) res[ir] <- 1 - log(Sl[ir]) res[ii] <- (Sl[ii] * (1 - log(Sl[ii])) - Su[ii] * (1 - log(Su[ii]))) / (Sl[ii] - Su[ii]) if (type == "adj") return(res) data.frame(IC = res_ic, adjusted = res) } if (!file.exists("CS-resid.rda")) { resids <- CSresid(dcmp, type = "both") save(resids, file = "CS-resid.rda") } else { load("CS-resid.rda") } @ \begin{figure}[!ht] \centering <>= rf_cens <- survfit(IC ~ 1, data = resids) ## Turnbull NPMLE for interval-censored rf_adj <- survfit(Surv(adjusted) ~ 1, data = resids) ## KM for adjusted par(mfrow = c(1, 2), cex = 0.9, mar = c(4, 4, 2, 1)) plot(rf_cens$time, -log(rf_cens$surv), type = "s", lwd = 1, las = 1, xlab = "Cox-Snell residuals", ylab = "Cumulative hazard") abline(0, 1, lwd = 2, lty = 2) grid() blx <- grconvertX(0.10, from = "nfc", to = "user") bly <- grconvertY(0.98, from = "nfc", to = "user") text(blx, bly, labels = "A", xpd = TRUE, cex = 1.2) plot(rf_adj$time, rf_adj$cumhaz, type = "s", lwd = 1, las = 1, xlab = "Cox-Snell residuals", ylab = "Cumulative hazard") abline(0, 1, lwd = 2, lty = 2) grid() blx <- grconvertX(0.10, from = "nfc", to = "user") bly <- grconvertY(0.98, from = "nfc", to = "user") text(blx, bly, labels = "B", xpd = TRUE, cex = 1.2) @ \caption{Cox-Snell residuals of the carrion decomposition model. \emph{Panel A}: Treating the residuals as interval-censored and estimating the cumulative hazard function using the Turnbull non-parametric maximum likelihood estimator. \emph{Panel B}: Using the adjustment proposed by \protect\citet{Farrington_2000}. The dashed lines correspond to the unit exponential distribution. }\label{fig:resid} \end{figure} We can relax the assumption of proportional hazards by allowing for time-dependent covariate effects. A transformation model with time-dependent effects for the \texttt{Insects} indicator can be estimated as % <>= dcmp2 <- CoxphME(Time | Insects ~ Habitat + Landscape + s(Temperature, k = 20) + s(Elevation100, k = 20) + (1 | PlotID), data = carrion, log_first = TRUE, order = 6) @ % In Figure~\ref{fig:non-ph}, we compare the effect estimates of the presence of insects (on the log-hazard scale) from the proportional hazards and non-proportional hazards (time-varying effects) models. According to these results, the proportional hazards assumption seems plausible. \begin{figure}[!ht] \centering <>= cf <- coef(dcmp2, with_baseline = TRUE) vc <- vcov(dcmp2, pargroup = "baseline") cf <- cf[grep("Insectsyes", names(cf), fixed = TRUE)] idx <- grep("Insectsyes", colnames(vc), fixed = TRUE) vc <- vc[idx, idx] ns <- 200 nd <- model.frame(dcmp2)[rep(1, ns), ] nd[[variable.names(dcmp2, "response")]] <- seq(1, 100, length.out = ns) X <- model.matrix(dcmp2, data = nd, type = "Y", simplify = TRUE)$Ye idx <- grep("Insectsyes", colnames(X), fixed = TRUE) X <- X[, idx] ci <- confint(multcomp::glht(multcomp::parm(cf, vc), linfct = X), calpha = multcomp::univariate_calpha(), level = 0.95)$confint ## use multcomp::adjusted_calpha() for multiplicity adjustments par(cex = 0.8, mar = c(4, 4, 2, 1), las = 1) plot(nd$Time, ci[, 1], type = "l", col = 1, lwd = 2, ylim = c(-1, 3), panel.first = {grid(); abline(h = 0, lwd = 2, col = "lightgrey")}, ylab = "Log-hazard ratio", xlab = "") title(xlab = "Time (days)", line = 2.3) polygon(c(nd$Time, rev(nd$Time)), c(ci[, 2], rev(ci[, 3])), border = NA, col = grey(0.5, 0.2)) ci2 <- confint(dcmp, parm = "Insectsyes", estimate = TRUE) ci2 <- ci2[rep(1, ns), ] matlines(nd$Time, ci2, col = 1, lwd = c(1, 1, 2), lty = c(3, 3, 2)) legend("bottomright", c("Time-varying effect", "Proportional effect"), lty = 1:2, lwd = 2, bty = "n", cex = 0.9) @ \caption{The effect of the presence of insects on the decomposition process from the proportional-hazards and non-proportional hazards models.}\label{fig:non-ph} \end{figure} \section{\emph{E. coli} concentrations in streams with different grazing periods}\label{sec:ecoli} \citet{Hulvey_2021} compare the concentration levels of \emph{Escherichia coli} bacteria (most probable number, MPN) in streams under three different rotational grazing regimes. In the additive mixed model specifications they estimated, within-year variability was modeled, as functions of the day of year (DOY), with cubic regression splines and between-year and location-level variability were captured by random intercepts of pasture-specific year effects and separate stream effects. Note that although the cyclic version of the cubic regression splines (\texttt{bs = "cc"} in \pkg{mgcv} and \pkg{tramME}) \citep{pkg:tramME} would be more appropriate for modeling the within-year trend, the original article used \texttt{bs = 'cr'} and hence we also stick with this basis in our reanalysis. <>= ecoli <- read.csv("Hulvey2021.csv") fs <- c("treatment", "stream", "pasture", "cattle", "rotation") ecoli[fs] <- lapply(ecoli[fs], factor) @ As a first step, we replicate the results of all model variants that \citet{Hulvey_2021} investigated in the original article with the \textsf{R} package \pkg{gamm4} \citep{pkg:gamm4}. In a second step, we fit the \emph{same} models using the software implementation of additive mixed transformation models in package \pkg{tramME}, that is, using a linear transformation function (function \code{tramME::LmME}). We do expect identical results in steps one and two, although the two implementions rely on two completely distinct code bases. Thus, these results are only interesting from a quality assurance point of view. In the last step, we relax the normal distributional assumption by allowing a nonlinear transformation function (\code{tramME::BoxCoxME} function) and evaluate how the model fits change. We are primarily interested in potential changes of the model interpretation induced by a shift from a normal to a distribution-free model. As Table~\ref{tbl:ecoli-res} shows, we managed to reproduce the \pkg{gamm4} results with \pkg{tramME}. Moreover, relaxing the distributional assumption of the normal linear model resulted in stronger model fits in terms of in-sample log-likelihood values. <>= ## specifications w/o random effects mf <- c(log10(ecoli_MPN) ~ treatment + cattle + s(DOY, bs = "cr", by = treatment), log10(ecoli_MPN) ~ treatment + cattle + s(DOY, bs = "cr"), log10(ecoli_MPN) ~ treatment + s(DOY, bs = "cr", by = treatment), log10(ecoli_MPN) ~ cattle + s(DOY, bs = "cr"), log10(ecoli_MPN) ~ treatment + s(DOY, bs = "cr"), log10(ecoli_MPN) ~ s(DOY, bs = "cr")) names(mf) <- paste("Model", c(1:5, "Null")) ecoli_res <- data.frame(matrix(NA, nrow = length(mf), ncol = 3)) colnames(ecoli_res) <- c("gamm", "LmME", "BoxCoxME") rownames(ecoli_res) <- names(mf) for (i in seq_along(mf)) { m_gamm <- gamm4(mf[[i]], data = ecoli, random = ~ (1 | year:stream:pasture) + (1 | stream), REML = FALSE) ecoli_res$gamm[i] <- logLik(m_gamm$mer) mf2 <- update(mf[[i]], . ~ . + (1 | year:stream:pasture) + (1 | stream)) m_LmME <- LmME(mf2, data = ecoli) if (m_LmME$opt$convergence == 0) ecoli_res$LmME[i] <- logLik(m_LmME) m_BCME <- BoxCoxME(mf2, data = ecoli) if (m_BCME$opt$convergence == 0) ecoli_res$BoxCoxME[i] <- logLik(m_BCME) } @ \begin{table}[!ht] \centering \caption{Log-likelihood values of the fitted models presented by \protect\citet[\emph{GAMM}]{Hulvey_2021}, replicated as mixed-effects additive transformation models assuming conditional normality (\emph{Additive normal transformation model}) and extended as flexible (non-normal) mixed-effects additive transformation models (\emph{Additive non-normal transformation model}).} \label{tbl:ecoli-res} <>= names(ecoli_res) <- c("\\multicolumn{1}{m{2cm}}{\\centering GAMM}", "\\multicolumn{1}{m{4cm}}{\\centering Additive normal transformation model}", "\\multicolumn{1}{m{4cm}}{\\centering Additive non-normal transformation model}") print(xtable(ecoli_res, align = c("l", "R{2cm}", rep("R{4cm}", 2))), floating = FALSE, booktabs = TRUE, sanitize.colnames.function = function(x){x}) @ \end{table} Let us focus on the most complicated specification, Model~1, % <>= update(mf[[1]], . ~ . + (1 | year:stream:pasture) + (1 | stream)) @ % \noindent and compare the effect estimates from the normal model to its non-parametric counterpart. But first, notice that by changing the transformation from $h(y) = \vartheta_{0} + \vartheta_{1}y$ to $h(y) = \ba(y)\T\bvartheta$, we change the scale on which the coefficients and the smooth terms are interpreted. In the normal additive mixed model, the coefficient of a fixed effect captures the change in the expectation of the outcome when increasing the respective predictor by one unit (keeping everything else unchanged). In the non-normal transformation model with $\Phi$ as the inverse link, the coefficients capture similar effects but on a latent scale defined by the transformation $h(Y)$. To cast the effect estimates from the two models to a common scale, we can calculate the \emph{probabilistic indices} \citep[PI,][]{Thas_2012}. To simplify the notation, first, we will now focus on the simple, fixed effects-only case with a single predictor: % \begin{align*} \IP(Y\leq y \mid \X = x) &= \Phi\left(h(y) - \beta x \right) \end{align*} % The PI is the probability that one outcome ($Y^{\star}$) is larger than the other ($Y$), given the same covariate values ($\X$) except for one, which is larger with one unit ($\X^{\star}$). In our simplified example, this means % \begin{align*} \IP\left(Y < Y^{\star} \mid \X = x, \X^{\star} = x+1\right) &= \IP\left(h(Y) < h(Y^{\star}) \mid \X = x, \X^{\star} = x+1\right) \\ &= \IP\left(\frac{h(Y) - h(Y^{\star}) + \beta}{\sqrt{2}} < \frac{\beta}{\sqrt{2}} \right) \\ &= \Phi\left(\frac{\beta}{\sqrt{2}}\right). \end{align*} % The third line follows from the fact that, in a transformation model with $\Phi(\cdot)$ as the inverse link, $h(Y)$ and $h(Y^{\star})$ are independent, normally distributed random variables with unit variance and a mean difference of $\beta$. Notice that the PI does not depend on the transformation function. When random effects are present in the model, the PI is conditional on the cluster. In the case of transformation models with non-linear additive terms the probabilistic index is a function of the covariate. In the simplest form of an additive transformation model with probit link, we have % \begin{align*} \IP(Y\leq y \mid \X = x) &= \Phi\left(h(y) - f(x) \right) \end{align*} % and the PI is % \begin{align*} PI(x) = \IP\left(Y < Y^{\star} \mid \X = x, \X^{\star} = x+1\right) = \Phi\left(\frac{f(x+1) - f(x)}{\sqrt{2}} \right). \end{align*} By transforming the effect estimates to the probability scale, Figure~\ref{fig:ecoli-nonnorm} compares the smooth terms from the normal and non-normal versions of Model~1, while the first two blocks of Table~\ref{tbl:ecoli-fes} contrasts the fixed effects estimates. The results are very close to each other, which suggests that the original log-normal model is actually appropriate. As a built-in visual normality check, we can compare the fitted transformation functions of the normal and non-normal transformation models. The linear function corresponds to a conditional normal distribution in Figure~\ref{fig:ecoli-trafo}. This result further confirms the appropriateness of the normal additive model in this specific example. <>= plot2cis <- function(x, y, col = c(1, 2), fill = c(grey(0.1, 0.25), rgb(1, 0, 0, 0.25)), xlabs = NULL, ylabs = NULL, mains = NULL, ...) { stopifnot(length(x) == length(y)) for (ii in seq_along(x)) { plot(0, type = "n", xlab = if (is.null(xlabs)) colnames(x[[ii]])[1] else xlabs[ii], ylab = if (is.null(ylabs)) colnames(x[[ii]])[2] else ylabs[ii], main = if (is.null(mains)) NULL else mains[ii], xlim = range(x[[ii]][, 1], y[[ii]][, 1]), ylim = range(x[[ii]][, 2:4], y[[ii]][, 2:4]), panel.first = grid(), ...) lines(x[[ii]][, 1], x[[ii]][, 2], col = col[1]) lines(y[[ii]][, 1], y[[ii]][, 2], col = col[2]) polygon(c(x[[ii]][, 1], rev(x[[ii]][, 1])), c(x[[ii]][, 3], rev(x[[ii]][, 4])), border = NA, col = fill[1]) polygon(c(y[[ii]][, 1], rev(y[[ii]][, 1])), c(y[[ii]][, 3], rev(y[[ii]][, 4])), border = NA, col = fill[2]) } } ## NOTE: currently only w/ pnorm inverse link smooth2ci <- function(sm, PI = FALSE, ilink = "pnorm") { PIfun <- switch(ilink, pnorm = function(x) pnorm(x / sqrt(2)), stop("No other inverse links are available atm.")) lapply(sm, function(x) { out <- matrix(0, nrow = nrow(x), ncol = 4) colnames(out) <- c(colnames(x)[c(1, ncol(x)-1)], "lwr", "upr") out[, 1] <- x[, 1] se <- rev(x)[, 1] yy <- rev(x)[, 2] out[, 2] <- yy out[, 3:4] <- yy + qnorm(0.975) * se %o% c(-1, 1) if (PI) out[, 2:4] <- PIfun(out[, 2:4]) out }) } ## NOTE: this function assumes the smooth term: s(DOY, by = "treatment") diff_smooth <- function(mod, DOY = NULL, n = 100) { XZ_sm <- function(XZ) { X <- XZ$X X[, attr(XZ$X, "type") != "sm"] <- 0 Z_ <- Matrix::t(XZ$Zt)[, attr(XZ$Zt, "type") == "sm", drop = FALSE] Z <- tramME:::nullTMatrix(nrow = nrow(Z_), ncol = length(mod$param$gamma)) Z[, attr(mod$param$gamma, "type") == "sm"] <- Z_ list(X = X, Z = Z) } mf <- model.frame(mod, drop.unused.levels = TRUE) if (is.null(DOY)) DOY <- seq(range(mf$DOY)[1], range(mf$DOY)[2], length.out = n) nd_ <- expand.grid(DOY = DOY, treatment = factor(levels(mf$treatment), levels = levels(mf$treatment))) nd <- mf[rep(1, nrow(nd_)), ] nd[colnames(nd_)] <- nd_ XZ <- model.matrix(mod, data = nd, type = c("X", "Zt"), keep_sign = FALSE) XZ1 <- XZ_sm(XZ) nd$DOY <- nd$DOY + 1 XZ <- model.matrix(mod, data = nd, type = c("X", "Zt"), keep_sign = FALSE) XZ2 <- XZ_sm(XZ) XZ <- list(X = XZ2$X - XZ1$X, Z = tramME:::as_dgTMatrix(XZ2$Z - XZ1$Z)) pr <- predict(mod$tmb_obj, newdata = XZ, scale = "lp") split(data.frame(DOY = nd_$DOY, df = pr$pred, se = pr$se), nd$treatment) } @ <>= fm1 <- log10(ecoli_MPN) ~ treatment + cattle + s(DOY, bs = "cr", by = treatment) + (1 | year:stream:pasture) + (1 | stream) ecoli_m1 <- LmME(fm1, data = ecoli) ecoli_m1_bc <- BoxCoxME(fm1, data = ecoli) @ \begin{figure}[!ht] \centering <>= par(mfrow = c(1, 3), cex = 0.8, mar = c(4, 4, 2, 1), las = 1) plot2cis(smooth2ci(diff_smooth(ecoli_m1), PI = TRUE), smooth2ci(diff_smooth(ecoli_m1_bc), PI = TRUE), mains = paste("treatment =", levels(ecoli$treatment)), ylabs = rep("PI", 3)) legend("topright", c("normal", "non-normal"), col = c(1, 2), lty = 1, bty = "n", cex = 0.9) @ \caption{The comparison of the smooth terms from the normal and non-normal (probit link) mixed-effects additive transformation models (specification Model~1). }\label{fig:ecoli-nonnorm} \end{figure} \begin{figure}[!ht] \centering <>= nd <- model.frame(ecoli_m1_bc)[rep(1, 100), ] nd[[1]] <- do.call(seq, c(as.list(range(log10(ecoli$ecoli_MPN))), length.out = 100)) Y <- model.matrix(ecoli_m1_bc, data = nd, type = "Y")$Ye b <- coef(ecoli_m1_bc, with_baseline = TRUE)[1:7] vc <- vcov(ecoli_m1_bc, pargroup = "baseline") ci <- confint(multcomp::glht(multcomp::parm(b, vc), linfct = Y), calpha = multcomp::univariate_calpha())$confint par(mar = c(4, 4, 1, 1), cex = 0.8, las = 1) plot(0, type = "n", xlim = range(nd[[1]]), ylim = range(ci), xlab = variable.names(ecoli_m1_bc, "response"), ylab = "h(y)", panel.first = grid(), xaxs = "i", yaxs = "i") lines(nd[[1]], ci[, 1], lwd = 2) polygon(c(nd[[1]], rev(nd[[1]])), c(ci[, 2], rev(ci[, 3])), col = grey(0.2, 0.2), border = FALSE) b2 <- coef(ecoli_m1, with_baseline = TRUE) abline(b2[1:2], lwd = 2, lty = 2) legend("topleft", c("Non-normal", "Normal"), lwd = 2, lty = 1:2, bty = "n") @ \caption{Baseline transformation functions from the normal and non-normal mixed-effects additive transformation models.}\label{fig:ecoli-trafo} \end{figure} The outcome variable (MPN per 100~ml) was measured with the Quanti-Tray System, which can detect \emph{E. coli} concentrations up to a maximum of 2,419.6~MPN without dilution. This means that there is an effective upper detection limit on the outcome, i.e., the \Sexpr{sum(ecoli$ecoli_MPN == 2419.6)} observations with the value of 2,419.6 are \emph{right censored}. The authors of the original article mention this fact, but they do not take into account in the subsequent analyses. Because random censoring can be easily handled in \pkg{tramME}, we will rerun the model taking the upper limit into account. % <>= fm1c <- update(fm1, Surv(log10(ecoli_MPN), event = ecoli_MPN < 2419.6) ~ .) ecoli_m1_cens <- BoxCoxME(fm1c, data = ecoli) summary(ecoli_m1_cens) @ % The fitted non-linear terms are compared to the original (normal linear) estimates in Figure~\ref{fig:ecoli-cens} and the fixed effects are presented in the third block of Table~\ref{tbl:ecoli-fes}. \begin{table}[!ht] \centering \caption{ Estimates of the parametric fixed-effects terms on the \emph{probability scale} (PI:~probabilistic index) from the normal, non-normal and non-normal (with censoring taken into account) models, respectively. }\label{tbl:ecoli-fes} <>= cis <- lapply(list(ecoli_m1, ecoli_m1_bc, ecoli_m1_cens), function(x) { ci <- confint(x, pargroup = "shift", estimate = TRUE) ci <- pnorm(ci[, c(3, 1, 2)] / sqrt(2)) ci <- formatC(ci, format = "f", digits = 2) cbind(ci[, 1], apply(ci[, 2:3], 1, function(x) paste(x, collapse = "---"))) }) tbl <- do.call("cbind", cis) rownames(tbl) <- c("treatment = medium", "treatment = short", "cattle = present") add <- list() add$pos <- list(0) add$command <- paste(c("& \\multicolumn{2}{c}{Normal} &", "\\multicolumn{2}{c}{Non-normal} &", "\\multicolumn{2}{c}{Non-normal, censored} \\\\\n", "\\cmidrule(lr){2-3} \\cmidrule(lr){4-5}", "\\cmidrule(lr){6-7}", rep("& PI & 95\\% CI ", 3), "\\\\\n"), collapse = " ") print(xtable(tbl, align = "lrrrrrr"), include.colnames = FALSE, floating = FALSE, add.to.row = add, sanitize.text.function = function(x) x, booktabs = TRUE) @ \end{table} \begin{figure}[!ht] \centering <>= par(mfrow = c(1, 3), cex = 0.8, mar = c(4, 4, 2, 1), las = 1) plot2cis(smooth2ci(diff_smooth(ecoli_m1), PI = TRUE), smooth2ci(diff_smooth(ecoli_m1_cens), PI = TRUE), mains = paste("treatment =", levels(ecoli$treatment)), ylabs = rep("PI", 3)) legend("topright", c("normal", "non-normal, censored"), col = c(1, 2), lty = 1, bty = "n", cex = 0.9) @ \caption{ The comparison of the smooth terms from the original model (normal linear) and the non-normal (probit link) extension where censoring is also taken into account. }\label{fig:ecoli-cens} \end{figure} Because the transformation model approximates the conditional distribution of the outcome, in theory, we do not even have to take the base 10 logarithm of the \emph{E. coli} most probable numbers (MPN) on the left-hand side of the model formula. \pkg{tramME} should be able to approximate the \emph{most likely transformation}. % <>= f_nontr <- update(fm1, Surv(ecoli_MPN, event = ecoli_MPN < 2419.6) ~ .) ecoli_nontr <- BoxCoxME(f_nontr, data = ecoli, log_first = TRUE) summary(ecoli_nontr) @ % Notice that we set \texttt{log\_first = TRUE} in the function call, to take the natural logarithm of the outcome before setting up the Bernstein bases. This usually helps the approximation in the case of positive right-skewed outcomes. With this, we basically estimate the same model as the original, but with the natural logarithm instead of base-ten. Because of this difference, the log-likelihood values are also different, but the fixed effects and variance components parameter estimates, as well as the smooth terms are essentially the same as in the case of the model \texttt{ecoli\_m1\_cens}. In summary, after bringing the estimates to the same scale, the results of the additive mixed effects model did not change much in this specific example by switching to the transformation model approach. The originally applied base 10 logarithm falls very close to the fitted ``most likely transformation'', i.e., taking the logarithm of the outcome was sufficient to achieve (close) conditional normality. This could be verified through comparing the baseline transformation functions of the normal and non-normal models, which can also serve as a visual check on conditional normality. Moreover, the number of censored outcomes was relatively small in the sample, so taking the censoring properly into account did not result in large differences, either. However, as the example demonstrated, transformation models are flexible enough to accommodate these properties of the response of interest (non-normality and censoring) automatically, without the need to apply ad hoc transformations or to implement new estimation procedures. In this sense, \code{tramME::BoxCoxME} provides a simple way of checking the impact of the more restrictive assumptions hard-wired in \code{gamm4::gamm4} on model interpretation and of handling censoring properly in the estimation procedure. \section{Sea urchin removal experiment} \citet{Andrew_1993} analyzed the percentage cover of filamentous algae under four sea urchin removal treatments (Control/33\%/66\%/Removal). The algae colonization was measured on five quadrants located on several larger patches, so there is a clear grouped structure in the data. \citet{Douma_2019} reanalyzed the data as a demonstration for the usage of mixed-effects models for zero-inflated beta regression. Here, we fit mixed-effects transformation models to the data and compare the results to zero-inflated mixed-model estimates obtained from \texttt{glmmTMB} \citep{Brooks_2017,pkg:glmmTMB}. Figure~\ref{fig:algae-data} presents the empirical cumulative distribution functions of the outcome under the four treatments. Note the large number of zeros, especially in the control group. <>= andrew <- read.csv("andrew.csv", colClasses = c(QUAD = "factor", PATCH = "factor")) andrew$TREAT <- factor(andrew$TREAT, labels = c("control", "removal", "0.33", "0.66")) andrew$TREAT <- factor(andrew$TREAT, levels = c("control", "0.33", "0.66", "removal")) andrew$pALGAE <- andrew$ALGAE / 100 ## summary(andrew) ecdfs <- lapply(split(andrew, andrew$TREAT), function(x) ecdf(x$pALGAE)) @ \begin{figure}[!ht] \centering <>= x <- seq(0, 1, length.out = 100) plot(x, ecdfs[[1]](x), type = "s", xlab = "Algae cover proportion", ylab = "ECDF", lty = 1, lwd = 2, xlim = c(0, 1), ylim = c(0, 1), las = 1, panel.first = grid()) for (ii in 2:4) { lines(x, ecdfs[[ii]](x), type = "s", lty = ii, lwd = 2) } legend("bottomright", levels(andrew$TREAT), lty = 1:4, lwd = 2, bty = "n", cex = 0.9) @ \caption{Empirical CDFs of the algae cover proportions under the four treatments.}\label{fig:algae-data} \end{figure} First, we fit a zero-inflated beta regression model with random intercepts for the patches. The probability of observing zero values is allowed to vary with the treatment. % <>= urchin_zib <- glmmTMB(pALGAE ~ TREAT + (1 | PATCH), ziformula = ~ TREAT, data = andrew, family = beta_family()) @ As an alternative to the traditional beta regression approach, we estimate a mixed-effects continuous outcome logistic regression. % <>= urchin_tram <- ColrME( Surv(pALGAE, pALGAE > 0, type = "left") ~ TREAT + (1 | PATCH), bounds = c(-0.1, 1), support = c(-0.1, 1), data = andrew, order = 6) summary(urchin_tram) @ % To allow for a jump in the conditional CDF of the outcome, we expand its bound and treat the zero observations as left-censored. This way, we can place a point mass on zero, i.e., introduce a jump at 0 (see Figure~\ref{fig:cdf-jump}). \begin{figure}[!ht] \centering <>= par(mar = c(4, 4, 1, 1)) layout(mat = matrix(1:3, nrow = 1), widths = c(45, 10, 45)) nd <- model.frame(urchin_tram)[rep(21, 100), ] nd[[1]] <- seq(-0.1, 1, length.out = 100) ccdf_e <- predict(urchin_tram, newdata = nd, type = "distribution", ranef = "zero") plot(nd[[1]], ccdf_e, type = "l", ylim = c(0, 1), xlim = c(-0.1, 1), panel.first = grid(), xlab = "y", ylab = "h(y)") idx <- which(nd[[1]] < 0) lines(nd[[1]][idx], ccdf_e[idx], col = 2, lwd = 3) par(mar = c(1, 1, 1, 1)) plot(0:1, 0:1, type = "n", yaxt = "n", xaxt = "n", xlab = "", ylab = "", bty = "n") arrows(x0 = 0, x1 = 1, y0 = 0.5, length = 0.1, lwd = 3) nd[[1]] <- seq(0, 1, length.out = 100) ccdf <- predict(urchin_tram, newdata = nd, type = "distribution", ranef = "zero") par(mar = c(4, 4, 1, 1)) plot(nd[[1]], ccdf, type = "l", ylim = c(0, 1), xlim = c(-0.1, 1), lwd = 2, panel.first = grid(), xlab = "y", ylab = expression(F[Y] * "(y)")) points(0, 0, pch = 21, cex = 1, lwd = 2) points(0, ccdf[1], pch = 20, cex = 1, lwd = 2) segments(x0 = -0.1, x1 = -0.01, y0 = 0, lwd = 2) @ \caption{Visual demonstration of how a discrete jump is introduced in the CDF by extending the support and treating the edge cases as censored.}\label{fig:cdf-jump} \end{figure} Because the zero-inflated beta model is a mixture of two models, the interpretation of its results is cumbersome. It is not clear which parameters, or combinations of parameters, one needs to inspect to contrast the effects of the various treatments. Moreover, extra steps are needed to calculate the marginal effects of the covariates. In contrast, the mixed-effects transformation model only contains a single set of fixed effects parameters and their interpretation is straightforward: For example, the odds of observing higher proportions of algae cover under the 33\% removal treatment is about $\exp(-\widehat{\beta}_{0.33}) = $ \Sexpr{formatC(exp(-coef(urchin_tram)[1]), format = 'f', digits = 2)} times higher compared to the control group. To assess the fits of the two models we can marginalize the conditional distributions by integrating over the random effects numerically, and compare against the ECDFs. As Figure~\ref{fig:algae-mcdf1} shows, both model overestimate the dispersion in the control group. <>= ## Numerical integration to get an estimate of the marginal distribution marginalize.zib.glmmTMB <- function(model, data, type = c("distribution", "density"), lower = -Inf, upper = Inf) { type <- match.arg(type) ## for a single data point joint <- function(re, nd, mod, type) { mu <- plogis(predict(mod, newdata = nd, type = "link", re.form = NA) + re) mu <- ifelse(mu == 0, .Machine$double.eps, mu) mu <- ifelse(mu == 1, 1 - .Machine$double.eps, mu) sig <- predict(mod, newdata = nd, type = "disp") nu <- predict(mod, newdata = nd, type = "zprob") out <- sapply(mu, function(m) { switch(type, distribution = gamlss.dist::pBEZI(nd[[1]], mu = m, sigma = sig, nu = nu), density = gamlss.dist::dBEZI(nd[[1]], mu = m, sigma = sig, nu = nu), stop("Unknown function!")) }) out * dnorm(re, mean = 0, sd = sqrt(VarCorr(mod)$cond[[1]][1])) } ## integrate out the random effects row by row res <- parallel::mclapply(split(data, seq(nrow(data))), function(nd) { out <- integrate(joint, lower = lower, upper = upper, nd = nd, mod = model, type = type) if (out$message == "OK") return(out$value) return(NA) }, mc.cores = 8) unlist(res) } marginalize.tramME <- function(model, data, type = "survivor", add = 0, lower = -Inf, upper = Inf) { ## fun(y|x,g) * density(g) joint <- function(re, nd, mod, type) { nd <- nd[rep(1, length(re)), ] rv <- variable.names(mod, "response") nd[[rv]] <- nd[[rv]] + add nd[[variable.names(mod, "grouping")]] <- seq(nrow(nd)) ## to take vector-valued REs pr <- predict(mod, newdata = nd, ranef = re, type = type) * dnorm(re, 0, sd = sqrt(varcov(mod)[[1]][1, 1])) c(pr) } ## integrate out the random effects row by row res <- parallel::mclapply(split(data, seq(nrow(data))), function(nd) { out <- integrate(joint, lower = lower, upper = upper, nd = nd, mod = model, type = type) if (out$message == "OK") return(out$value) return(NA) }, mc.cores = 8) unlist(res) } nd <- expand.grid(pALGAE = seq(0, 1, length.out = 100), TREAT = factor(levels(andrew$TREAT), levels = c("control", "0.33", "0.66", "removal")), PATCH = andrew$PATCH[1], KEEP.OUT.ATTRS = FALSE) colnames(nd)[1] <- variable.names(urchin_tram, "response") if (!file.exists("urchin.rda")) { ## estimate the glmmTMB model urchin_zib <- glmmTMB(pALGAE ~ TREAT + (1 | PATCH), ziformula = ~ TREAT, data = andrew, family = beta_family()) ## save results in a single df urchin_res <- nd colnames(urchin_res)[1] <- "pALGAE" urchin_res$tramME_shift <- marginalize.tramME(urchin_tram, data = nd, type = "distribution") urchin_res$zib <- marginalize.zib.glmmTMB(urchin_zib, data = nd, type = "distribution") urchin_zib_ll <- data.frame(ll = logLik(urchin_zib), np = length(urchin_zib$obj$par)) } else{ load("urchin.rda") } @ \begin{figure}[!ht] \centering <>= multiplot <- function(dfs, yn, xn = colnames(dfs[[1]])[1], ecdfs = NULL, cols = 1:(length(yn)+!is.null(ecdfs)), ltys = rep(1, length(yn)+!is.null(ecdfs)), lwds = rep(1, length(yn)+!is.null(ecdfs)), mains = NULL, ...) { if (!is.null(ecdfs)) stopifnot(length(dfs) == length(ecdfs)) for (i in seq_along(dfs)) { col <- if (is.null(ecdfs)) cols else cols[-length(cols)] lty <- if (is.null(ecdfs)) ltys else ltys[-length(ltys)] lwd <- if (is.null(ecdfs)) ltys else lwds[-length(lwds)] main <- if (is.null(mains)) names(dfs)[i] else mains[i] matplot(dfs[[i]][[xn]], dfs[[i]][yn], type = "l", col = col, lty = lty, lwd = lwd, panel.first = grid(), ..., main = main) if (!is.null(ecdfs)) { lines(dfs[[i]][[xn]], ecdfs[[i]](dfs[[i]][[xn]]), type = "s", lty = rev(ltys)[1], col = rev(cols)[1], lwd = rev(lwds)[1], ...) } } } cols <- c("#E16A86", "#00AD9A") layout(mat = matrix(c(1:4, rep(5, 4)), nrow = 2, byrow = TRUE), heights = c(7, 1)) par(mar = c(4, 4, 3, 1), las = 1) multiplot(split(urchin_res, urchin_res$TREAT), yn = c("tramME_shift", "zib"), ecdfs = ecdfs, mains = paste("treatment =", levels(urchin_res$TREAT)), cols = c(cols, 1), lwds = c(rep(2, 2), 1), xlab = "pALGAE", ylab = "prob", xlim = c(0, 1)) par(mar = c(0, 0, 0, 0)) plot.new() legend("center", c("Linear transformation model", "Zero-inflated beta", "ECDF"), col = c(cols, 1), lty = 1, bty = "n", lwd = c(rep(2, 2), 1), horiz = TRUE) @ \caption{Fitted marginal distributions of algae cover proportion from the zero-inflated beta regression and the mixed-effects transformation model, respectively. The step functions show the empirical cumulative distribution functions in the four treatment groups.}\label{fig:algae-mcdf1} \end{figure} Systematic differences in the outcome variability in the treatment groups occur in many situations \citep{Douma_2019}. By modeling the dispersion separately, we can incorporate such differences in the beta regression model. % <>= urchin_zib_disp <- glmmTMB(pALGAE ~ TREAT + (1 | PATCH), ziformula = ~ TREAT, dispformula = ~ TREAT, data = andrew, family = beta_family()) @ In the mixed-effects linear transformation model, we stratify to the treatment group to allow for separate transformation functions. % <>= urchin_tram_strat <- ColrME( Surv(pALGAE, pALGAE > 0, type = "left") | 0 + TREAT ~ 1 + (1 | PATCH), bounds = c(-0.1, 1), support = c(-0.1, 1), data = andrew, order = 6, control = optim_control(iter.max = 1e3, eval.max = 1e3, rel.tol = 1e-9)) summary(urchin_tram_strat) @ <>= if (!("tramME_strat" %in% colnames(urchin_res))) { ## fit the glmm urchin_zib_disp <- glmmTMB(pALGAE ~ TREAT + (1 | PATCH), ziformula = ~ TREAT, dispformula = ~ TREAT, data = andrew, family = beta_family()) ## marginalize urchin_res$tramME_strat <- marginalize.tramME(urchin_tram_strat, data = nd, type = "distribution") urchin_res$zib_disp <- marginalize.zib.glmmTMB(urchin_zib_disp, data = nd, type = "distribution") } if (nrow(urchin_zib_ll) == 1) { urchin_zib_ll <- rbind(urchin_zib_ll, c(logLik(urchin_zib_disp), length(urchin_zib_disp$obj$par))) } if (!file.exists("urchin.rda")) { save(urchin_res, urchin_zib_ll, file = "urchin.rda") } @ As Figure~\ref{fig:algae-mcdf2} illustrates, the two models fit the data much better. However, the cost of this flexibility is that we cannot reduce the group comparisons to inference on a small set of parameters anymore. \begin{figure}[!ht] \centering <>= cols <- c("#E16A86", "#00AD9A") layout(mat = matrix(c(1:4, rep(5, 4)), nrow = 2, byrow = TRUE), heights = c(7, 1)) par(mar = c(4, 4, 3, 1), las = 1) multiplot(split(urchin_res, urchin_res$TREAT), yn = c("tramME_strat", "zib_disp"), ecdfs = ecdfs, mains = paste("treatment =", levels(urchin_res$TREAT)), cols = c(cols, 1), lwds = c(rep(2, 2), 1), xlab = "pALGAE", ylab = "prob", xlim = c(0, 1)) par(mar = c(0, 0, 0, 0)) plot.new() legend("center", c("Stratified linear transformation model", "Zero-inflated beta with dispersion model", "ECDF"), col = c(cols, 1), lty = 1, bty = "n", lwd = c(rep(2, 2), 1), horiz = TRUE) @ \caption{Fitted marginal distributions of algae cover proportion from the zero-inflated beta regression with dispersion model and the stratified mixed-effects transformation model, respectively. The step functions show the empirical cumulative distribution functions in the four treatment groups.}\label{fig:algae-mcdf2} \end{figure} Figures~\ref{fig:algae-mcdf1} and~\ref{fig:algae-mcdf2} demonstrate the flexibility of the distribution-free approach of transformation models compared to the parametric alternative. This is also reflected in the log-likelihood values (Table~\ref{tbl:ll-algae}). \begin{table}[!ht] \centering \caption{Log-likelihood values of the four model specifications for the sea urchin removal experiment. }\label{tbl:ll-algae} <>= lls <- data.frame(c(urchin_zib_ll$ll[1], urchin_zib_ll$ll[2], logLik(urchin_tram), logLik(urchin_tram_strat)), c(urchin_zib_ll$np[1], urchin_zib_ll$np[2], length(urchin_tram$tmb_obj$par), length(urchin_tram_strat$tmb_obj$par))) rownames(lls) <- c("Zero-inflated beta without dispersion model", "Zero-inflated beta with dispersion model", "Linear transformation model", "Stratified linear transformation model") colnames(lls) <- c("$\\log\\mathcal{L}$", "Number of parameters") print(xtable(lls, align = "lrr", digits = c(0, 2, 0)), sanitize.text.function = function(x) x, booktabs = TRUE, floating = FALSE) @ \end{table} In summary, although the shift-scale beta regression model is not a special case of a transformation model and one thus cannot expect identical results with a specific parameterisation of \code{tramME::ColrME}, the simpler transformation model (with one instead of two linear predictors) produced a better model fit (when comparing the in-sample log-likelihoods). \section{Mosquito control trial} <>= AGO <- read.csv("Juarez2021.csv") factors <- c("Community", "HouseID", "Year", "Income", "Placement") AGO[factors] <- lapply(AGO[factors], factor) AGO$AEAfemale <- as.integer(AGO$AEAfemale) @ \citet{Juarez_2021} presented the results of a cluster randomized crossover trial that assessed the efficacy of Autocidal Gravid Ovitrap (AGO) as a tool for against the mosquito species \emph{Aedes aegypti}. The outcome of interest was the number of female mosquitoes collected on glue boards that were placed either inside or outside of the selected houses in various neighborhoods. Within-year patterns in mosquito counts as well as the coverage of the treatment in different areas were modeled with non-linear smooths, while unobserved household and community level effects were captured by nested random effects. The original article presented the results of a conditional Poisson and a negative binomial model. We reproduce these results with \pkg{gamm4}, and also estimate a mixed-effects additive transformation model for count data with ``expit'' inverse link function. Detailed exposition of count transformation models is given by \citet{Siegfried_Hothorn_2020}. For fitting such a model, we will use the following custom-made \texttt{'CotramME'} model class implementing the likelihood for count data via interval censoring \citep{Siegfried_Hothorn_2020}, which is currently not part of the \pkg{tramME} package. % <>= ## Additive count transformation model ## See ?cotram::cotram for the documentation CotramME <- function(formula, data, method = c("logit", "cloglog", "loglog", "probit"), log_first = TRUE, plus_one = log_first, prob = 0.9, ...) { method <- match.arg(method) rv <- all.vars(formula)[1] stopifnot(is.integer(data[[rv]]), all(data[[rv]] >= 0)) data[[rv]] <- data[[rv]] + as.integer(plus_one) sup <- c(-0.5 + log_first, quantile(data[[rv]], prob = prob)) bou <- c(-0.9 + log_first, Inf) data[[rv]] <- as.Surv(R(data[[rv]], bounds = bou)) fc <- match.call() fc[[1L]] <- switch(method, logit = quote(ColrME), cloglog = quote(CoxphME), loglog = quote(LehmannME), probit = quote(BoxCoxME)) fc$method <- NULL fc$plus_one <- NULL fc$prob <- NULL fc$log_first <- log_first fc$bounds <- bou fc$support <- sup fc$data <- data out <- eval(fc, parent.frame()) out$call$data <- match.call()$data class(out) <- c("CotramME", class(out)) out } mosquito_tram <- CotramME(AEAfemale ~ Year + Income*Placement + s(Week) + s(CovRate200) + (1|HouseID) + (1|Community), offset = -log(daystrapping), data = AGO, method = "logit", order = 5, log_first = TRUE, prob = 0.9) @ <>= if (file.exists("mosquito_models.rda")) { load("mosquito_models.rda") } else { ## GAMMs mosquito_pois <- gamm4(AEAfemale ~ offset(log(daystrapping)) + Year + Income*Placement + s(Week) + s(CovRate200), random =~ (1|HouseID) + (1|Community), data = AGO, family=poisson) mosquito_nb <- gamm4(AEAfemale ~ offset(log(daystrapping)) + Year + Income*Placement + s(Week) + s(CovRate200), random =~ (1|HouseID) + (1|Community), data = AGO, family = negative.binomial(1)) ## additive count transformation model CotramME <- function(formula, data, method = c("logit", "cloglog", "loglog", "probit"), log_first = TRUE, plus_one = log_first, prob = 0.9, ...) { method <- match.arg(method) rv <- all.vars(formula)[1] stopifnot(is.integer(data[[rv]]), all(data[[rv]] >= 0)) data[[rv]] <- data[[rv]] + as.integer(plus_one) sup <- c(-0.5 + log_first, quantile(data[[rv]], prob = prob)) bou <- c(-0.9 + log_first, Inf) data[[rv]] <- as.Surv(R(data[[rv]], bounds = bou)) fc <- match.call() fc[[1L]] <- switch(method, logit = quote(ColrME), cloglog = quote(CoxphME), loglog = quote(LehmannME), probit = quote(BoxCoxME)) fc$method <- NULL fc$plus_one <- NULL fc$prob <- NULL fc$log_first <- log_first fc$bounds <- bou fc$support <- sup fc$data <- data out <- eval(fc, parent.frame()) out$call$data <- match.call()$data class(out) <- c("CotramME", class(out)) out } mosquito_tram <- CotramME(AEAfemale ~ Year + Income*Placement + s(Week) + s(CovRate200) + (1|HouseID) + (1|Community), offset = -log(daystrapping), data = AGO, method = "logit", order = 5, log_first = TRUE, prob = 0.9) stopifnot(mosquito_tram$opt$convergence == 0) } @ Table~\ref{tbl:mosquito-ll} compares the log-likelihood values of the three model versions. In terms of in-sample model fit, as measured by the log-likelihood value, both the negative binomial and the transformation model perform much better than the Poisson GAMM. The results suggest slight improvement in the model fit when we relax the conditional distribution assumption of the negative binomial GAMM and follow the distribution-free transformation model approach. \begin{table}[!ht] \centering \caption{Log-likelihood values of the fitted Poisson and negative binomial GAMMs reproduced from \protect\citet{Juarez_2021} along with the log-likelihood of an additive transformation model for count data.} \label{tbl:mosquito-ll} <>= if (!file.exists("mosquito_models.rda")) { ll_mosquito <- c("Poisson GAMM" = logLik(mosquito_pois$mer), "Negative binomial GAMM" = logLik(mosquito_nb$mer), "Additive count transformation model" = logLik(mosquito_tram)) ll_mosquito <- data.frame(ll_mosquito) names(ll_mosquito) <- "Log-likelihood" } print(xtable(ll_mosquito, align = "lr"), floating = FALSE, booktabs = TRUE) @ \end{table} We will now concentrate on comparing the estimates from the negative binomial and the count transformation models. Note that the scales on which the parameters are interpreted are different in the two models: While in the negative binomial model, the parametric and smooth terms affect the log of the conditional mean of the outcome, in the transformation model with ``logit'' link (i.e., ``expit'' inverse link), they are interpreted on the log-odds scale. Unlike in the example application of Section~\ref{sec:ecoli}, we cannot easily transform the negative binomial parameters to the probability scale. Although the magnitudes of the effect estimates of the two models are not directly comparable, their directions, significance and the general shapes of the smooths can be contrasted. <>= if (!file.exists("mosquito_models.rda")) { sums_mosquito <- list( nb = summary(mosquito_nb$gam), tram = summary(mosquito_tram) ) } @ Figure~\ref{fig:mosquito-smooth} compares the smooth estimates of the GAMM from \pkg{gamm4} and the transformation model from \pkg{tramME}. Although the within-year time patterns (\texttt{s(Week)}) from the two models are almost identical (on different scales), the difference of the smooth estimates of the coverage rate (\texttt{s(CovRate200)}) is marked. The general shapes of the smooths are similar, but the negative binomial GAMM penalizes it more, which is also reflected in the EDFs: \Sexpr{round(sums_mosquito$nb$edf[2], 2)} and \Sexpr{round(sums_mosquito$tram$smooth[2, 1], 2)} for the negative binomial and count transformation models, respectively. \begin{figure}[!ht] \centering <>= if (!file.exists("mosquito_models.rda")) { nd <- AGO[rep(1, 100), ] nd$Week <- seq(min(AGO$Week), max(AGO$Week), length.out = 100) nd$CovRate200 <- seq(min(AGO$CovRate200), max(AGO$CovRate200), length.out = 100) pr <- predict(mosquito_nb$gam, type = "terms", terms = c("s(Week)", "s(CovRate200)"), newdata = nd, se.fit = TRUE) sm_mosquito_nb <- lapply(colnames(pr[[1]]), function(n) { ci <- pr$fit[, n] + qnorm(0.975) * pr$se.fit[, n] %o% c(-1, 1) out <- data.frame(cbind(pr$fit[, n], ci)) colnames(out) <- c("fit", "lwr", "upr") out }) names(sm_mosquito_nb) <- colnames(pr[[1]]) sm_mosquito_nb[[1]]$x <- nd$Week sm_mosquito_nb[[2]]$x <- nd$CovRate200 sm_mosquito_tram <- smooth_terms(mosquito_tram) } plotsm <- function(sm, xlab, ylab) { plot(sm$x, sm$fit, type = "l", xlim = range(sm$x), ylim = range(sm[, c("lwr", "upr")]), xlab = xlab, ylab = ylab, panel.first = grid()) polygon(c(sm$x, rev(sm$x)), c(sm$lwr, rev(sm$upr)), border = NA, col = grey(0.5, 0.25)) } par(mfrow = c(2, 2), mar = c(4, 4, 3, 1), cex = 0.8, las = 1) plotsm(sm_mosquito_nb[[1]], "Week", "s(Week)") plotsm(sm_mosquito_nb[[2]], "CovRate200", "s(CovRate200)") mtext("Negative binomial model", side = 3, line = -2, outer = TRUE) sm <- sm_mosquito_tram for (i in seq_along(sm_mosquito_tram)) { sm[[i]][, 2] <- -sm[[i]][, 2] plot(sm[i], panel.first = grid()) } mtext("Count transfromation model", side = 3, line = -23, outer = TRUE) @ \caption{Smooth terms from the negative binomial and transformation models of the \emph{A. aegypti} counts. The dashed lines and the grey areas denote the 95\% confidence intervals}\label{fig:mosquito-smooth} \end{figure} Because the parametric and smooth terms of the two models are defined on different scales, the magnitudes of the effect estimates are not directly comparable. As Table~\ref{tbl:mosquito-fe} shows, the directions of the effects match and neither model finds evidence that the main effect of middle income is different from zero. \begin{table} \centering \caption{ Point estimates and 95\% confidence intervals of the parametric fixed effects terms from the negative binomial and count transformation models of the mosquito control data by \protect\citet{Juarez_2021}. Note that the scale of the parameters are different and the effect sizes are not directly comparable. }\label{tbl:mosquito-fe} <>= if (!file.exists("mosquito_models.rda")) { formatCI <- function(x, digits = 2) { fx <- formatC(x, format = "f", digits = digits) fx <- matrix(paste0("$", ifelse(c(x) > 0, paste0("\\phantom{-}", fx), fx), "$"), ncol = 2) apply(fx, 1, paste, collapse = " ---") } b_nb <- sums_mosquito$nb$p.table[-1, 1] se_nb <- sums_mosquito$nb$p.table[-1, 2] ci_nb <- b_nb + qnorm(0.975) * se_nb %o% c(-1, 1) ci_nb <- formatCI(ci_nb) b_tr <- -sums_mosquito$tram$coef[, 1] se_tr <- sums_mosquito$tram$coef[, 2] ci_tr <- b_tr + qnorm(0.975) * se_tr %o% c(-1, 1) ci_tr <- formatCI(ci_tr) ci_mosquito <- data.frame(paste0("$", formatC(b_nb, format = "f", digits = 2), "$"), ci_nb, paste0("$", formatC(b_tr, format = "f", digits = 2), "$"), ci_tr) rownames(ci_mosquito) <- c("Year = 2018", "Income = middle", "Placement = out", "Income = middle \\& Placement = out") save(sm_mosquito_nb, sm_mosquito_tram, ll_mosquito, ci_mosquito, sums_mosquito, file = "mosquito_models.rda") } add <- list() add$pos <- list(0) add$command <- paste(c("& \\multicolumn{2}{c}{Negative binomial} &", "\\multicolumn{2}{c}{Count transformation} \\\\\n", "\\cmidrule(lr){2-3} \\cmidrule(lr){4-5}", rep("& $\\widehat\\beta$ & 95\\% CI ", 2), "\\\\\n"), collapse = " ") print(xtable(ci_mosquito, align = c("@{}l", rep("r", 3), "r@{}")), include.colnames = FALSE, floating = FALSE, add.to.row = add, sanitize.text.function = function(x) x, booktabs = TRUE) @ \end{table} Again, the models compared for this example are not nested and it is therefore hard to compare them directly. The transformation model leads to a similar model interpretation as the model based on the negative-binomial distribution. Model uncertainty was larger in the transformation model, at least for the nonlinear effect of \code{CovRate200}, and thus one might wonder if the stricter distributional assumption lead to overconfident model interpretation. \section{Simulation experiments}\label{sec:sim} The goals of the following simulation experiments are threefold. % \begin{enumerate*} \item First, we want to evaluate the loss of model performance when using the flexible, distribution-free mixed-effects additive transformation model approach in a scenario when a simple normal linear additive mixed model is appropriate. \item We are interested in comparing the performances of the normal and non-normal model under a highly non-normal data-generating process. \item Finally, we investigate the performance of the non-normal transformation model in the case of interval-censored data. \end{enumerate*} We consider a normal and a non-normal data-generating process for simulating grouped data with non-linear covariate effects. The conditional distributions corresponding to both of these can be written in the general transformation model form: % \begin{align*} \IP(Y\leq y\mid x_{1}, x_{2}, \gamma_{j}) &= \Phi\left(h(y) - \beta x_{1} - f(x_{2}) - \gamma_{j}\right) \qquad \gamma_{\left\{1:J\right\}}\overset{\text{i.i.d.}}{\sim}\cN(0,\sigma), \end{align*} % where $\Phi$ denotes the CDF of the standard Gaussian distribution, $\beta x_{1}$ is the parametric fixed-effect term, $f(x_{2})$ is a non-linear term and $\gamma_{j}$ is the random intercept of the group $j=1\dots J$ with $\sigma$ standard deviation. The data are simulated by numerically inverting the conditional distribution function. % <>= ##' @param n Numeric vector, number of observations in each group ##' @param beta Numeric, effect size ##' @param sfun Function with a numeric argument, non-linear smooth shift term ##' @param sigma Numeric, SD of random intercepts ##' @param hinv Function with a numeric argument, inverse transformation function ##' @param link Function with a single numeric argument on [0, 1] link function ##' @param scale Numeric, optional scaling constant on the transformation scale ##' @param seed Seed for the random number generator ##' @param two_sets Logical; generate both an estimation and a test sample gen_smpl <- function(n, beta, sfun, sigma, hinv, link, scale = 1, seed = NULL, two_sets = TRUE) { ## -- setting up the seed if (!exists(".Random.seed", envir = .GlobalEnv, inherits = FALSE)) runif(1) if (is.null(seed)) RNGstate <- get(".Random.seed", envir = .GlobalEnv) else { R.seed <- get(".Random.seed", envir = .GlobalEnv) set.seed(seed) RNGstate <- structure(seed, kind = as.list(RNGkind())) on.exit(assign(".Random.seed", R.seed, envir = .GlobalEnv)) } if (two_sets) n <- rep(n, 2) x1 <- runif(sum(n)) x2 <- runif(sum(n)) gr <- factor(rep(seq_along(n), n)) re <- rep(rnorm(length(n), mean = 0, sd = sigma), n) lp <- x1 * beta + sfun(x2) + re y <- hinv((link(runif(sum(n))) + lp) * scale) if (two_sets) { n <- sum(n) / 2 out <- list(est = data.frame(x1 = x1[1:n], x2 = x2[1:n], gr = gr[1:n], y = y[1:n]), test = data.frame(x1 = tail(x1, n), x2 = tail(x2, n), gr = tail(gr, n), y = tail(y, n))) } else { out <- data.frame(x1 = x1, x2 = x2, gr = gr, y = y) } out } @ For the inverse baseline transformations and the non-linear shift term we set the following functional forms: % \begin{align*} h_{1}(y) &= y, \qquad h_{2}(y) = \Phi^{-1}\left(F_{\chi^{2}_{3}}(y)\right), \qquad f(x) = \xi \sin(\pi x), \end{align*} % with $\Phi^{-1}(\cdot)$ as the quantile function of the standard normal distribution and $F_{\chi^{2}_{3}}(\cdot)$ is the distribution function of the $\chi^{2}$-distribution with 3 degrees of freedom. By using a linear function, $h_{1}(y)$ as the transformation function, we assume a normal additive mixed-model. In contrast, setting $h_{2}(t)$ introduces non-normality. In this case, the conditional distribution implied by the data-generating mechanism is $\chi^{2}$ ($\text{df}=3$) when the linear predictor is zero. In the simulations, we use $\beta=\sqrt{2}\Phi^{-1}(0.7)$ so that the effect size of the parametric fixed-effect term is $0.7$ on the probabilistic index scale in both the normal and the non-normal case (see Section~\ref{sec:ecoli} for details). Values of $x_{1}$ and $x_{2}$ are drawn from the uniform distribution on the interval between 0 and 1 for each observation separately. The the parameters $\sigma$ and $\xi$ are set to values that result in equal variances for the parametric ($\beta x_{1}$), the non-linear ($f(x_{2})$) and the random-effects terms of the linear predictor on the transformation scale. <>= ## ==== Simulation inputs b <- qnorm(0.7) * sqrt(2) ## PI = 0.7, SD of the parametric part is sqrt(1/12) * b = 0.2141 on the LP scale sf <- function(x) 2 * b * sqrt(1/12) * sin(x * pi) / sqrt(2 - 16 / pi^2) ## same SD as the parametric terms on the LP scale sig <- sqrt(1/12) * b ## same RE SD on LP scale hi1 <- function(x) x hi2 <- function(x) qchisq(pnorm(x), df = 3) lnk <- qnorm ## ==== Helper function to extract results get_res <- function(res, what, which = NULL, simplify = TRUE) { out <- lapply(res, function(x) { if (!is.null(which)) x <- x[which] sapply(x, function(xx) { if (length(xx) == 1 && is.na(xx)) NA else { if (is.function(what)) return(what(xx)) xx[[what]] } }) }) if (simplify) { out <- do.call("rbind", out) if (!is.function(what)) colnames(out) <- paste0(names(res[[1]]), "_", what) } out } @ For the third question, to generate censored outcomes on an interval of a given length from continuous data, we define the following function: % <>= make_intcens <- function(x, length = 1) { Surv(floor(x / length) * length, ceiling(x / length) * length, type = "interval2") } @ In all scenarios, we simulate moderate-sized datasets with 100 subjects/groups ($J=100$) and four observations per each group. We repeat all simulations 500 times. Our primary focus in these simulation experiments is on inference about the fixed-effects parameter $\beta$ and the general predictive performances of the model variants evaluated through the out-of-sample log-likelihood. For the out-of-sample log-likelihood calculations, we integrate over the random intercepts in the log-likelihood (i.e., the exponential of Equation~\ref{eq:loglik}) of a new set of observations and evaluate it at the fitted values of the remaining parameters (coefficients corresponding to the baseline transformation, the fixed effect and the smooth term as well as the variance component and smoothing parameters). \subsection{Normal data-generating process} To investigate our first question, we draw samples from the (conditionally) normal additive mixed model (i.e., $h(y)=h_{1}(y)=y$) and fit a mixed-effects additive transformation model (with standard normal inverse link), as well as the correctly specified normal additive mixed model, to it. <>= if (!file.exists("sim1.rda")) { seeds <- 301:800 system.time({ sims1 <- parallel::mclapply(seeds, function(s) { smpl <- gen_smpl(rep(4, 100), beta = b, sfun = sf, sigma = sig, hinv = hi1, link = lnk, seed = s, two_sets = TRUE) m1 <- LmME(y ~ x1 + s(x2) + (1 | gr), data = smpl$est) if (m1$opt$convergence == 0) { lmm <- list(ll = logLik(m1), lloos = logLik(m1, newdata = smpl$test, type = "fix_smooth"), beta = coef(m1), sigma = sqrt(varcov(m1)$gr[1,1]), theta = m1$param$beta[attr(m1$param$beta, "type") == "bl"], beta_se = sqrt(vcov(m1, parm = "x1")[1,1])) } else { lmm <- NA } m2 <- BoxCoxME(y ~ x1 + s(x2) + (1 | gr), data = smpl$est) if (m2$opt$convergence == 0) { bcm <- list(ll = logLik(m2), lloos = logLik(m2, newdata = smpl$test, type = "fix_smooth"), beta = coef(m2), sigma = sqrt(varcov(m2)$gr[1,1]), theta = m2$param$beta[attr(m2$param$beta, "type") == "bl"], beta_se = sqrt(vcov(m2, parm = "x1")[1,1]), intercept = mean(sf(smpl$est$x2))) } else { bcm <- NA } list(LmME = lmm, BoxCoxME = bcm) }, mc.cores = 6) }) save(sims1, file = "sim1.rda") } else { load("sim1.rda") } ## -- non-convergence rowSums(sapply(sims1, function(x) sapply(x, function(xx) c(length(xx) == 1 && is.na(xx)))) ) @ \begin{figure} \centering <>= par(cex = 0.8, mar = c(4, 4.2, 2, 1), las = 1) boxplot(pnorm(get_res(sims1, "beta") / sqrt(2)) - 0.7, ylab = "PI", names = c("Normal", "Non-normal"), boxwex = 0.5) grid(nx = NA, ny = NULL) abline(h = 0, lwd = 2, lty = 2, col = 2) @ \caption{Bias of the coefficient of the parametric fixed term in the case of the conditionally normal data-generating process. The results are presented on the probabilistic index scale. ``Normal'' corresponds to the results from the normal additive mixed model, while ``Non-normal'' denotes the distribution-free mixed-effects additive transformation model.}\label{fig:sim1-bias} \end{figure} According to Figure~\ref{fig:sim1-bias}, both the correctly specified additive model and the flexible transformation model results in unbiased estimates for the $\beta$ parameter, with no visible additional variability in the case of the more general, distribution-free approach. The coverage rates of the 95\%~Wald confidence intervals from the two models are very close to their nominal levels. % <>= cvi <- get_res(sims1, what = function(x) { ci <- x$beta + x$beta_se * qnorm(0.975) * c(-1, 1) (ci[1] <= b) && (b < ci[2]) }) (cvr <- colMeans(cvi, na.rm = TRUE)) ## Coverage rate sqrt(cvr * (1 - cvr) / nrow(cvi)) ## Monte Carlo SE @ The Monte Carlo distributions of out-of-sample log-likelihood are presented in Figure~\ref{fig:sim1-pred}. For each simulated dataset for fitting the two models, we generated an independent evaluation dataset of the same size and structure. We calculated the log-likelihood of this new data given the parameters fitted to the estimation sample. The predictive performance of the two models are very close to each other, with just slight reduction in the log-likelihood values for the more complex transformation model. \begin{figure} \centering <>= par(cex = 0.8, mar = c(4, 4.2, 2, 1), las = 1) boxplot(get_res(sims1, "lloos"), ylab = "Out-of-sample log-likelihood", names = c("Normal", "Non-normal"), boxwex = 0.5) grid(nx = NA, ny = NULL) @ \caption{Predictive performance of the normal additive mixed model and the mixed-effects additive transformation model evaluated by the out-of-sample log-likelihood of an independently generated dataset.}\label{fig:sim1-pred} \end{figure} To visually assess, how well the mixed-effects additive transformation model approximates the conditionally normal response distribution, we plot the fitted baseline transformation functions ($\widehat h(y)$), and compare to the true function $h(y)=y$ (Figure~\ref{fig:sim1-baseline}). \begin{figure} \centering <>= par(cex = 0.8, mar = c(4, 4.2, 2, 1), las = 1) th <- get_res(sims1, "theta", "BoxCoxME", simplify = FALSE) th <- do.call("cbind", th) ## NOTE: to compare with the true transformation function (identity) we need to ## adjust the fitted baseline transformation with the expectation of the smooth ## term ic <- get_res(sims1, "intercept", "BoxCoxME", simplify = FALSE) ic <- sapply(ic, `[`, 1) dat <- gen_smpl(rep(4, 100), beta = b, sfun = sf, sigma = sig, hinv = hi1, link = lnk, seed = 1, two_sets = FALSE) bcm <- BoxCoxME(y ~ x1 + s(x2) + (1 | gr), data = dat, nofit = TRUE) nd <- dat[rep(1, 100), ] nd[[variable.names(bcm, "response")]] <- seq(-1, 2, length.out = 100) mm <- model.matrix(bcm$model$ctm$bases$response, data = nd) tr <- mm %*% th + matrix(rep(ic, each = 100), nrow = 100) matplot(nd$y, tr, type = "l", col = grey(0.1,0.1), lty = 1, ylab = "h(y)", xlab = "y", panel.first = grid()) abline(0,1, lwd = 2, col = 2) @ \caption{Fitted baseline transformation functions from the mixed-effects additive transformation model under normal data-generating mechanism. The red line shows the true value.}\label{fig:sim1-baseline} \end{figure} \subsection{Non-normal data-generating process} In many applied regression problems, the assumption that the outcome is normally distributed, conditionally on the predictors, is violated. Normal additive mixed models are still often used in these cases, mainly due to the fact that the normal linear model can be remarkably robust to some violations of the model assumptions \citep{Schielzeth_etal_2020}. In the next simulation experiment, we compare the normal additive mixed model and non-normal mixed-effects transformation model based on the inference on the parametric fixed term and out-of-sample predictive performance under a non-normal, skewed data-generating process. As an additional goal, we also investigate the performance of the transformation approach when the outcome variable is interval-censored, i.e., none of the values are exactly observed, only up to an interval. For each iteration in the Monte Carlo simulations, we generate a dataset with exactly observed outcomes (from the conditionally non-normal data-generating process) and fit the normal additive model and the non-normal transformation model to it. We then create the censored version of the same variable by binning its values into intervals (of length two in our simulations) and define the corresponding \code{Surv} object. % <>= dat <- gen_smpl(rep(4, 100), beta = b, sfun = sf, sigma = sig, hinv = hi2, link = lnk, seed = 1, two_sets = FALSE) head(dat$y) ## exact values head(make_intcens(dat$y, length = 2)) ## interval-censored version @ % In the next step, we refit the transformation model using the interval-censored data, and compare its results to the additive mixed model and the transformation model with exact responses. To ensure the comparability of the out-of-sample predictions, we only transform the response in the estimation sample, but not the evaluation sample, to interval-censored. <>= nd <- data.frame(x2 = seq(0, 1, length.out = 100)) if (!file.exists("sim2.rda")) { seeds <- 1001:1500 system.time({ sims2 <- parallel::mclapply(seeds, function(s) { smpl <- gen_smpl(rep(4, 100), beta = b, sfun = sf, sigma = sig, hinv = hi2, link = lnk, seed = s, two_sets = TRUE) m1 <- LmME(y ~ x1 + s(x2) + (1 | gr), data = smpl$est) if (m1$opt$convergence == 0) { lmm <- list(ll = logLik(m1), lloos = logLik(m1, newdata = smpl$test, type = "fix_smooth"), beta = coef(m1), sigma = sqrt(varcov(m1)$gr[1,1]), beta_se = sqrt(vcov(m1, parm = "x1")[1,1])) } else { lmm <- NA } m2 <- BoxCoxME(y ~ x1 + s(x2) + (1 | gr), data = smpl$est) if (m2$opt$convergence == 0) { sm <- smooth_terms(m2, newdata = nd)[[1]][,2] + mean(sf(smpl$est$x2)) bcm <- list(ll = logLik(m2), lloos = logLik(m2, newdata = smpl$test, type = "fix_smooth"), beta = coef(m2), sigma = sqrt(varcov(m2)$gr[1,1]), beta_se = sqrt(vcov(m2, parm = "x1")[1,1]), smooth = sm) } else { bcm <- NA } smpl$est$y <- make_intcens(smpl$est$y, length = 2) m3 <- BoxCoxME(y ~ x1 + s(x2) + (1 | gr), data = smpl$est) if (m3$opt$convergence == 0) { sm <- smooth_terms(m3, newdata = nd)[[1]][, 2] + mean(sf(smpl$est$x2)) icm <- list(ll = logLik(m3), lloos = logLik(m3, newdata = smpl$test, type = "fix_smooth"), beta = coef(m3), sigma = sqrt(varcov(m3)$gr[1,1]), beta_se = sqrt(vcov(m3, parm = "x1")[1,1]), smooth = sm) } else { icm <- NA } list(LmME = lmm, BoxCoxME = bcm, BoxCoxME_ic = icm) }, mc.cores = 8) }) save(sims2, file = "sim2.rda") } else { load("sim2.rda") } ## -- non-convergence rowSums(sapply(sims2, function(x) sapply(x, function(xx) c(length(xx) == 1 && is.na(xx)))) ) @ Figure~\ref{fig:sim2-bias} show the bias in the estimates of the coefficient $\beta$ in the case of the two models and the transformation model under interval-censored data. In the case of the (misspecified) normal additive mixed-model, there is a sign of slight downward bias, while the transformation model gives unbiased estimates for the parametric fixed-effects coefficient, irrespective of whether it uses the exact observations or their interval-censored version. The coverages of the 95\% Wald confidence intervals are close to their nominal levels in all cases. % <>= cvi <- get_res(sims2, what = function(x) { ci <- x$beta + x$beta_se * qnorm(0.975) * c(-1, 1) (ci[1] <= b) && (b < ci[2]) }) (cvr <- colMeans(cvi, na.rm = TRUE)) ## Coverage rate sqrt(cvr * (1 - cvr) / nrow(cvi)) ## Monte Carlo SE @ \begin{figure} \centering <>= par(cex = 0.8, mar = c(4, 4.2, 2, 1), las = 1) boxplot(pnorm(get_res(sims2, "beta") / sqrt(2)) - 0.7, ylab = "PI", names = c("Normal", "Non-normal", "Non-normal (IC)"), boxwex = 0.5) grid(nx = NA, ny = NULL) abline(h = 0, lwd = 2, lty = 2, col = 2) @ \caption{Bias of the coefficient of the parametric fixed term in the case of the conditionally non-normal data-generating process. The results are presented on the probabilistic index scale. ``Normal'' corresponds to the results from the normal additive mixed model, ``Non-normal'' denotes the distribution-free mixed-effects additive transformation model and ``Non-normal (IC)'' show the results of the transformation model under interval-censoring.}\label{fig:sim2-bias} \end{figure} The differences between the normal and the distribution-free approaches are more marked in their out-of-sample predictive performances. As Figure~\ref{fig:sim2-pred} shows, the normal additive mixed model performs poorly compared to the transformation model, even in the interval-censored case, when the observations in the estimation sample are not exactly observed. \begin{figure} \centering <>= par(cex = 0.8, mar = c(4, 4.2, 2, 1), las = 1) boxplot(get_res(sims2, "lloos"), ylab = "Out-of-sample log-likelihood", names = c("Normal", "Non-normal", "Non-normal (IC)"), boxwex = 0.5) grid(nx = NA, ny = NULL) @ \caption{Predictive performance of the normal additive mixed model and the mixed-effects additive transformation model with exact (``Non-normal'') and interval-censored (``Non-normal (IC)'') observations evaluated by the out-of-sample log-likelihood of an independently generated dataset.}\label{fig:sim2-pred} \end{figure} Figure~\ref{fig:sim2-smooth} presents the fitted smooth non-linear terms from the non-normal transformation model in the case of the exactly observed and interval-censored scenarios. As the results show, the model is able to recover the non-linear term even when the information on the outcome is limited to intervals. \begin{figure} \centering <>= par(mfrow = c(1, 2), las = 1, mar = c(4, 5, 1, 1), cex = 0.8) sms <- get_res(sims2, what = "smooth", which = c("BoxCoxME", "BoxCoxME_ic"), simplify = FALSE) sm1 <- sapply(sms, function(x) x[, 1]) matplot(nd$x2, sm1, type = "l", col = grey(0.1, 0.1), lty = 1, ylab = expression(hat(f) * "(x)"), xlab = "x", panel.first = grid()) lines(nd$x2, sf(nd$x2), col = 2, lwd = 2) sm2 <- sapply(sms, function(x) x[, 2]) matplot(nd$x2, sm2, type = "l", col = grey(0.1, 0.1), lty = 1, ylab = expression(hat(f) * "(x)"), xlab = "x", panel.first = grid()) lines(nd$x2, sf(nd$x2), col = 2, lwd = 2) @ \caption{Fitted non-linear effects from the mixed-effects additive transformation model with exact observations (\emph{left panel}) and interval-censored observations (\emph{right panel}) under the non-normal data-generating mechanism. The red curves show the true function.}\label{fig:sim2-smooth} \end{figure} In summary, the results of the simulation experiments presented above suggest that, at least in the scenarios considered, the cost of using the distribution-free transformation model framework in mixed-effects additive modeling is low, while it can improve the inference and predictive performance when the distributional assumptions of the available alternatives are not met. Moreover, our results confirm that the model provides reliable estimates when the outcome variable is censored. \clearpage <>= if (file.exists("packages.bib")) file.remove("packages.bib") pkgversion <- function(pkg) { pkgbib(pkg) packageDescription(pkg)$Version } pkgbib <- function(pkg) { x <- citation(package = pkg, auto = TRUE)[[1]] b <- toBibtex(x) b[1] <- paste("@Manual{pkg:", pkg, ",", sep = "") if (is.na(b["url"])) { b[length(b)] <- paste(" URL = {http://CRAN.R-project.org/package=", pkg, "}", sep = "") b <- c(b, "}") } cat(b, sep = "\n", file = "packages.bib", append = TRUE) } pkg <- function(pkg) { vrs <- try(pkgversion(pkg)) if (inherits(vrs, "try-error")) return(NA) paste("\\\\pkg{", pkg, "} \\\\citep[version~", vrs, ",][]{pkg:", pkg, "}", sep = "") } pkgs <- c("gamm4", "tramME", "glmmTMB") out <- sapply(pkgs, pkg) x <- readLines("packages.bib") for (p in pkgs) x <- gsub(paste("\\{", p, ":", sep = ""), paste("\\{\\\\pkg{", p, "}:", sep = ""), x) cat(x, sep = "\n", file = "packages.bib", append = FALSE) @ \bibliographystyle{plainnat} \bibliography{ref,packages} \clearpage <>= sessionInfo() @ \end{document}