\documentclass[nojss]{jss}
%\documentclass[a4paper]{article}
\usepackage{graphicx}
\usepackage{color}
\usepackage{float}
\usepackage{rotating}
\usepackage{natbib}
\usepackage{amssymb,amsmath,bm}
\usepackage[figurewithin=section]{caption}

\usepackage{hyperref}

\newcommand{\ee}[1]{\ensuremath{\mathbb{E}\left(#1\right)}}
\newcommand{\p}[1]{\ensuremath{Pr\displaystyle\left(#1\right)}}
\newcommand{\ds}{\ensuremath{\displaystyle}}
\newcommand{\gem}{\textsc{gem }}
\newcommand{\cub}{\textsc{cub}}
\newcommand{\cush}{\textsc{cush }}
\newcommand{\ccup}{\textsc{cup }}
\newcommand{\ihg}{\textsc{ihg }}
\newcommand{\hcub}{\textsc{hcub}}
\newcommand{\vcub}{\textsc{vcub\,}}
\newcommand{\cube}{\textsc{cube}}
\newcommand{\gecub}{\textsc{g}\textit{e}\textsc{cub\,}}




\author{Maria Iannario\\\small University of Naples Federico II \And
        Domenico Piccolo\\\small University of Naples Federico II \And Rosaria Simone\\\small University of Naples Federico II}
\title{The \textsf{R} package CUB: \\a class of mixture models for ordinal rating data}

%% for pretty printing and a nice hypersummary also set:
\Plainauthor{Maria Iannario, Domenico Piccolo, Rosaria Simone} %% comma-separated
\Plaintitle{The \textsf{R}  package CUB: a mixture model for ordinal data} %% without formatting
\Shorttitle{The \textsf{R} package \textsc{cub\,}} %% a short title (if necessary)

%% an abstract and keywords
\Abstract{The \proglang{R} package \pkg{CUB} has been developed for the analysis of ordinal rating data with a class of finite mixture distributions whose main feature is the probabilistic specification of the decision-making process as the combination of uncertainty and feeling components. Extensions include models for overdispersion, inflated categories and large heterogeneity occurrences. The parameter estimation procedure is based on maximum likelihood methods, with the implementation of the Expectation-Maximization (EM) algorithm.  Several features of package \pkg{CUB}, including simulation routines, are presented on two data sets loaded within the package.
}
\Keywords{Rating data, \cub\, models, \cube\, models, \textit{Shelter effect}, \cush models, \ihg models}
\Plainkeywords{Rating data, \cub\, models, \cube\, models, \textit{Shelter effect}, \cush models, \ihg models} %% without formatting
%% at least one keyword must be supplied

%% publication information
%% NOTE: Typically, this can be left commented and will be filled out by the technical editor
%% \Volume{50}
%% \Issue{9}
%% \Month{June}
%% \Year{2012}
%% \Submitdate{2012-06-04}
%% \Acceptdate{2012-06-04}

%% The address of (at least) one author should be given
%% in the following format:
\Address{
  Maria Iannario, Domenico Piccolo, Rosaria Simone\\
  Department of Political Sciences\\
  University of Naples Federico II\\
  80138 Naples, Italy\\
  Telephone: +39/081-2537465\\
  Fax: +39/081-2537466\\
  E-mail: \email{maria.iannario@unina.it,domenico.piccolo@unina.it,rosaria.simone@unina.it}\\
  URL: \url{http://www.docenti.unina.it/MARIA.IANNARIO}\\
  URL: \url{http://www.docenti.unina.it/DOMENICO.PICCOLO}\\
  URL: \url{https://www.docenti.unina.it/ROSARIA.SIMONE}
}


<<include=FALSE,eval=TRUE,echo=FALSE>>=
library(knitr)
library(digest)
opts_chunk$set(
engine='R',dev='pdf',fig.width=7,fig.height=5,strip.white=TRUE,tidy=FALSE
)
@

% \VignetteIndexEntry{Analysis of rating data with CUB models} \\
% \VignetteEngine{knitr::knitr}
% \VignetteDepends{CUB} \\
% \VignettePackage{CUB} \\

\begin{document}

<<include=FALSE>>=
library(knitr)
opts_chunk$set(
concordance=TRUE
)
@

%\SweaveOpts{concordance=TRUE}
%\SweaveOpts{concordance=TRUE}


\maketitle

<<eval=TRUE,echo=FALSE,comment=NA>>=
options(warn=-1)
suppressMessages(library(CUB))
library(knitr)
options(warn=-1)
@


\section[Introduction]{Introduction}
Ordered categorical data are commonly used in scientific fields
where respondents express a graduated evaluation on a specific item. Examples include ratings of preference in consumer studies, degree of pain measured on a visual analogue scale, numerical or verbal rating scale, sensory evaluation on food perception and appreciation,  self evaluation of well-being, job satisfaction, economic perceived conditions and so on. Cumulative link models are a suitable
 class of models for such data since the ordered nature of observation is exploited
and the flexible regression framework allows good fitting performances.
These models with a logit link are widely known as the proportional odds model due to \cite{McCullagh1980}. The cumulative models are also
known as ordinal regression models although the latter term is used to indicate other
 models for ordinal responses such as continuation ratio models and adjacent category models (see e.g. \cite{Agr10}; \cite{Tutz2012}). Other alternatives with the indication of the logit and probit link functions are ordered logit models and ordered probit models
\citep{GreeneHensher2010}. 

Several R packages or functions for implementing these statistical models for ordered factor responses have been introduced, mainly addressing methods based on latent variables linked to observed categorical responses via threshold models. Among others, we quote \pkg{polr()} in \pkg{MASS} package \citep{Ripley2002}, fitting logistic or probit regression models, for instance. In \pkg{rms} the function \pkg{lrm()} is available \citep{Harrell09} for binary and proportional odds logistic regression; in \pkg{VGAM} the function \pkg{vgml()} \citep{Yee2010} to fit vector generalized linear models, whereas in \pkg{Zelig} it is possible to exploit \pkg{zelig()} to estimate several statistical models; in \pkg{nnet} the function \pkg{multinom()} allows to fit multinomial log-linear models \citep{Ripley2016} and in \pkg{Ordinal} the function \pkg{clm()} \citep{Christensen2015} fits general cumulative link models. For a Bayesian perspective, Markov Chain Monte Carlo for ordered probit regression are implemented in the package \pkg{MCMCpack} via calls to \pkg{MCMCoprobit()} \citep{Martin2009}.

An alternative approach is the use of models anchored to a structured discrete probability distribution \citep{DelPicc2005} with a close relationship to the data generating process. More specifically, a two-component mixture is designed to shape ordinal observations as a weighted combination of a preference component and an uncertainty component, each controlled by a given distribution.

The statistical appeal of these models relies on several features, among which: a sharp relationship with the data generating process producing ordinal data: parsimony in the parameter estimation; effective graphical devices useful for the interpretation, and direct inclusion of respondents' characteristics to specify how they relate with the two components.


This class of models stems from the baseline \emph{C}ombination of a discrete \emph{U}niform and a shifted \emph{B}inomial random variable -\cub\, models- firstly introduced by \cite{Piccolo2003} and \cite{DelPicc2005}, where two latent components, called \textit{feeling} and \textit{uncertainty}, are jointly considered to specify the mixture model. General references for the statistical background include \cite{Iannario2012a, Ian12b, Iannario2015}; \cite{Ianpicc10a}. A detailed list of alternative models is presented in \cite{IannarioPiccolo2016a}; see also \cite{Tutz2016}. Robustness issues for \cub\, models are discussed in \cite{Iannarioetal16a}.

Further methodological studies have been published about \cub\, models, among others \cite{Corduas2009}; \cite{Gamba14}; \cite{Grilli14} for a latent class version; \cite{Ian12c, Ian12d, Iannario2014};
\cite{ManiZucc2014a,Iannarioetal16} for including \textit{don't know} options also with covariates or for assuming non-linear spacing between adjacent categories:  \cite{ManiZucc2014b}. In addition, applications have been performed in many disciplines, ranging from labour economics \citep{Gamba13} to demography \citep{Ianpicc10b}, happiness economics \citep{CapecchiPiccolo2014}, linguistics \citep{Balirano2008},
marketing \citep{Iannarioetal12}, medicine \citep{Delia2008}, sensory science \citep{Picd08, CapEndrizzi}. Proposals for the generalization of the \cub\, model in a bivariate perspective are discussed by \cite{Corduas2015}. Moreover, \cite{AndreisFerrari2013}, \cite{ColombiGiordano2015} introduced a general multivariate setting.


This article deals with the package \pkg{CUB} written in the R environment and available from CRAN at \url{http://CRAN.R-project.org}. It is presented in its first consistent update, but the implemented program has been long experienced among scholars and researchers working with \textsc{cub\,} models and their extensions.


The paper is organized as follows. Section 2 briefly reviews \textit{GE}neralized \textit{M}ixture Models with uncertainty (\textsc{gem}) for
ordinal data stemming from \cub\, models. In Section 3 the main functions and features
of package \pkg{CUB} are illustrated by means of case studies based on two data-sets loaded within the package. Finally, extra features of \gem models and future extensions are briefly discussed in Section 4.

Ntice that package \pkg{CUB}, although performs statistical analysis for the whole class of \gem models, is named after the original proposal.


\section{GEM models specification}\label{sec:cub}

The basic idea of \gem framework is to model the ordinal responses $(R_1,\dots,R_n)$, where $R_i$ is given by the $i$-th subject, $i=1,\dots,n$, with respect to a given item $I$, on the basis of two main components related to the decisional process. The first component ($C_{1}$) related to the feeling (attraction, satisfaction, awareness towards the item): in the baseline \cub\, model, it is modelled by means of the shifted Binomial random variable. The second one ($C_{2}$) concerns the uncertainty (indecision, blurriness, fuzziness) and it is modelled by means of a discrete Uniform distribution over the support  $\{1,\ldots,m\}$. Here and in the sequel, $m$ will denote the number of ordinal categories, and it is assumed that $m > 3$ for identifiability purposes \citep{Ian10}. All available information on subjects' characteristics (covariates) are collected in a matrix $\bm T$, and $\bm Y$ and $\bm W$ are submatrices of $\bm T$ reporting values of respondents' covariates useful to explain uncertainty and feeling components, respectively.

Let $\mathbf{\bm\theta}=(\bm\beta^{'}, \bm\gamma{'})'$ be the parameter vector characterizing the distribution of $(R_1,\dots,R_n)$, with $\bm\beta^{'}, \bm\gamma{'}$ denoting the parameter vector for the uncertainty and feeling components, respectively. Then, the mixture regression model has the following form:
\begin{equation}\label{cub}
\p{R_i=r\mid \,\bm y_i,\, \bm w_i,\,\bm \theta}=\pi_i\, \p{C_{1i}=r \mid \bm w_i}\,+\,(1-\pi_i)\,\p{C_{2i}=r},
\end{equation}
for $i=1,\dots,n$ and $j=1,\dots,m$, where $\pi_i=\pi(\bm y_i, \bm \beta)\, \in (0,\,1]$ are introduced to weight the two components and $\bm w_i \in \bm W$,\, $\bm y_i \in \bm Y$ include the selected covariates for the $i$-th subject. 
They are related to the parameters by means of a logit link, a not compulsory but preferred choice for easiness of interpretation and robustness properties \citep{Iannarioetal16b}:
\begin{equation}
\label{paicsilogis}
logit\left(1-\pi_i\right)=-\bm \beta^{'}\,\bm y_i ;\qquad logit\left(1-\xi_i\right)= -\bm \gamma^{'}\,\bm w_i.
\end{equation}
%$$Pr(R=r)=\pi Pr(Y=j)\,+\,(1-\pi)Pr (V=j)\, \, \,=j=1,2,\ldots,m.$$


The implementation of \gem models relies on the \pkg{Formula} interface \citep{Formula}, allowing to specify different regressor matrices for the different parameters.  The following call will estimate a \gem model for given \code{ordinal} observations:
<<echo=TRUE,eval=FALSE>>=
GEM(Formula, family, ...)
@
where \code{Formula} is an object of class \pkg{Formula}, and \code{family} indicates which sub-class of models has to be fitted. Optional inputs for the specification of the models can be passed via the \code{...} argument, which serves also to modify some technical parameters needed for the estimation procedure. For instance, it is possible to change the maximum number of iterations allowed for running the optimization algorithm (\code{maxiter=500} by default) and the error tolerance for final estimates (\code{toler=1e-6} by default). The argument \code{data} is optional. The number of categories \code{m} is, in general, internally retrieved but it is advisable to pass  it as an argument to the call if some category has zero frequency.


As mentioned, the benchmark model is the \cub\, mixture model, where the preference part is modelled according to a shifted Binomial random variable: 
 $$b_r\left(\xi_i\right)={m-1 \choose r-1} \xi_i^{m-r} \left(1-\xi_i\right)^{r-1}, \qquad r=1,2,\dots,m$$
with feeling parameters given by $\xi_i$, $i=1,\dots,n$, as in \eqref{paicsilogis}.


The probability distribution chosen to model uncertainty is the discrete Uniform, thus it does not imply estimable parameters ensuring model parsimony. An increase of $\pi_i$ implies a reduced impact of the uncertainty component; thus, the quantity $(1-\pi_i)$ is a (normalized) measure of the uncertainty implied by the  model. Alternative subjective choices of distributions for this latent component are proposed and motivated in \cite{Gott2016}.

In the simplest version, the \gem model works on the ratings given to a single
item and provides a global measure of feeling and uncertainty for the whole population under study by letting $\pi_i=\pi$ and $\xi_i=\xi$, that is, by considering no knowledge about subjects' characteristics. The corresponding probability distribution can be computed via \code{probcub00(m, pai, csi)}.  


Other extensions which are implemented within package \pkg{CUB} concern the ability to take into account the presence of inflated categories and the overdispersion effect.


\subsection{Inflated CUB models}

Inflated \cub\, models imply the presence of a \emph{shelter} effect \citep{Iannario2012a} located at category $s \in \{1,\dots,m\}$. 

\cub\, models are thus extended with the introduction of a degenerate random variable $D_r^{(s)}=I(R=s)$, whose probability mass is concentrated at $r = s$, as a third component in (1). The function returns the estimation of parameters related both to the specification:
\begin{equation}\label{shelter1}
\p{R=r \mid \bm
\theta}=\pi_1\,b_r(\xi)+\pi_2\,\,\frac{1}{m}+(1-\pi_1-\pi_2)\,D_r^{(s)}\,,
\end{equation}
and to the alternative formulation:
\begin{equation}\label{shelter2}
\p{R=r \mid \bm \theta}=\delta\,\biggl[\,D_{r}^{(s)}\,\biggr]\,+\,(1-\delta)\,\biggl[\,\pi^*\,b_{r}(\xi)+(1-\pi^*)\,\,\frac{1}{m}\,\biggr], \,\,\, r=1,2,\dots,m.
\end{equation}
They are equivalent specifications since:
$$ 
\begin{cases}
\pi_1   = \pi^* (1-\delta) \\
\pi_2 = (1-\pi^*) (1-\delta)
\end{cases}
\Longleftrightarrow \qquad
\begin{cases}
\pi^{\star} = \dfrac{\pi_1}{\pi_1+\pi_2}\\
\delta = 1- \pi_1 - \pi_2.
\end{cases}
$$

The corresponding probability distributions can be computed via \code{probcubshe1(m, pai1, pai2, csi, shelter)} for \eqref{shelter1} and \code{probcubshe2(m, pai, csi, delta, shelter)} for \eqref{shelter2}, respectively. 

For the parameterization \eqref{shelter2}, the estimable parameter vector is $\bm \theta= (\pi^*,\,\xi,\,\delta)'$; thus it is immediate to quantify the \emph{shelter effect} by means of the parameter $\delta$. Moreover, the modification of the uncertainty component so induced is evaluated by comparing the $\pi$ parameter in model \eqref{cub} with  $\pi_1$ in model \eqref{shelter1}. A further specification of the \emph{shelter effect} adheres to the so-called \textit{satisficing behaviour}, impletemented  via \code{probcubshe3(m, lambda, eta, csi, shelter)}. Covariates in the model can be specified for all parameters by considering generalized \cub\, models (\gecub) \citep{IannarioPiccolo2016b}.

The examination of real phenomena where responses are affected by an uncertainty level close to the maximum and with inflation in just one category suggested the introduction of \cush models (\emph{C}ombination of a \emph{U}niform distribution and a \emph{SH}elter component) \citep{CapEndrizzi,CapecchiPiccolo2016,CapecchiIannario2016}, whose distribution is especially useful in dealing with sample surveys affected by large heterogeneity (several examples are observed in the analysis of activities for leisure times, see the discussion below). In other words, \cush models are nested into \gecub ones when the shifted Binomial component tends to have zero weight \citep{CapecchiIannario2016}: indeed, the distribution without covariates may be obtained by \eqref{shelter2} when $\pi^{\star}=0$. The routine \code{probcush(m, delta, shelter)} computes the corresponding probability distribution:
\begin{equation}\label{probcush}
\p{R=r \mid \bm \theta}=\delta\,\,D_{r}^{(s)}\,+\,(1-\delta)\,\frac{1}{m}, \,\,\,\quad r=1,2,\dots,m,
\end{equation}
where $\bm \theta = \delta$, the weight of the \textit{shelter effect}. If $\bm X$ is a submatrix of $\bm T$ specifying the observations on selected covariates for explaining the \emph{shelter effect}, then the link: $logit(\delta_i)= \bm \omega^{'} \bm x_i, i=1,\dots,n\,,$ will specify parameters for each respondent profile (here, $\bm \omega^{'}$ includes an intercept term).

\subsection{Overdispersion}

\cub\, models are not suitable to account for a possible extra-variability among responses, since location and variability of a Binomial random variable are jointly and severely constrained. Thus, a parsimonious extension has been proposed by introducing \cube\, models \citep{Iannario2014, Iannario2015}. Here,  the shifted Binomial distribution for $\p{C_{1i}=r}$ is replaced by a shifted Beta-Binomial distribution $g_r(\xi,\phi)$ over the support $\{1,\ldots,m\}$: 
\begin{equation}\label{probbb}
g_r(\xi,\phi)=   {m-1 \choose r-1} \dfrac{\prod\limits_{k=1}^{r}\big(1-\xi +\phi\,(k-1)\big) \, \prod\limits_{k=1}^{m-r+1}\big(\xi + \phi(r-1)\big)}{(1-\xi + \phi\,(r-1))(\xi + \phi\,(m-r)) \prod\limits_{k=1}^{m-1} (1+\phi\,(k-1))},        \,\,\, r=1,2,\dots,m,
\end{equation}
(see \code{betar()}).
Thus, the probability distribution of a \cube\, model -computed by \code{probcube(m, pai, csi, phi)}- is given by:
\begin{equation}\label{probbb}
\p{R=r \mid \bm \theta} =   \pi\,g_r(\xi,\phi)  \,+\, (1-\pi)\,\,\frac{1}{m}  \,\,\,\quad r=1,2,\dots,m.
\end{equation}

The additional parameter $\phi$ which characterizes the distribution is a direct measure of overdispersion \citep{Iannario2016}. When $\phi = 0$, a \cube\, model reduces to a \cub\, one; thus, the latter is nested into the first one: this implies that the improvement of the fit of \cube\, over \cub\, models for given data can be easily established by likelihood ratio tests.

Covariates may be introduced for explaining all the three parameters or only
the feeling component of the \cube\, mixture \citep{Piccolo2015}. The logarithmic link is customarily used for the overdispersion parameter: $\log\left(\phi_i\right)= \bm \alpha^{'}\bm z_{i} \,$ where $\bm z_{i} \in \bm Z$, and $\bm Z$ is a submatrix of $\bm T$ ($\bm \alpha^{'}$ includes an intercept term). Feeling and uncertainty components are related to subjects' characteristics as in \eqref{paicsilogis}.

It has been remarked \citep{Ian12b} that \cube\, models with:
\begin{equation}
\label{ihg_cube}
\pi = 1; \qquad \xi = \dfrac{(m-1)\theta}{1+m(\theta - 2)}; \qquad \phi = \dfrac{1-\theta}{1+\theta(m-2)},
\end{equation}
correspond to \ihg models \citep{Delia2003} with $\theta \in [0,1]$, where \ihg stands for Inverse Hypergeometric distribution. These models arise from unimodal extreme-feeling distributions with no uncertainty. In this case, for the $i$-th respondent, a parameter $\theta_i$ characterizes the model and gives the probability of observing a rating corresponding to the first/last category: therefore, $\theta_i$ is a direct measure of preference, attraction, pleasantness towards the investigated item. The routine \code{probihg(m, theta)} computes the \ihg probability distribution over $m$ categories:
\begin{equation*}
\p{R=r \mid \bm \gamma}=\theta (1-\theta)^{r-1}\,\frac{m-1}{m}\,\prod_{l=1}^{r}\,\frac{m-l+1}{m-l+\theta(l-1)}\,\;\quad r=1,\dots,m.
\end{equation*}
 Also for this sub-class of models, it is possible to study response patterns among respondents by including subjects' covariates to explain the preference parameter $\theta_i$, $i=1,\dots,n$, via a logit link:  $logit(\theta_i)=\bm\nu^{'} \bm u_i$, where $\bm u_{i} \in \bm U$ and $\bm U$ is a submatrix of $\bm T$ ($\bm \nu^{'}$ includes an intercept term).

After this self-contained overview of the statistical methodology grounded on \gem models, next section provides guidelines for the main calls to fit \gem models on given data  when using package \pkg{CUB}.


\subsection{Main calls}

As already specified, the baseline \gem model is the \cub\, distribution. To fit a \cub\, model to given ordinal data  arranged in a vector \code{ordinal}, the most general call to run is:

<<echo=TRUE,eval=FALSE>>=
GEM(Formula(ordinal~Y|W|X), family="cub", ...)
@
where \code{Y, W, X} indicate the model matrices of explanatory variables for uncertainty, feeling and possible \emph{shelter} components, respectively. If, for all components, no covariate is  included in the model, then \code{Y=0}, \code{W=0}, \code{X=0} need to be specified. For instance

<<echo=TRUE,eval=FALSE>>=
GEM(Formula(ordinal~0|0|0), family="cub")
@
fits a \cub\, model with no covariates for given \code{ordinal} data. Then,

<<echo=TRUE,eval=FALSE>>=
GEM(Formula(ordinal~Y|0|0), family="cub")
@

considers only the covariate matrix \code{Y} to explain the uncertainty component. Similarly,

<<echo=TRUE,eval=FALSE>>=
GEM(Formula(ordinal~0|W|0), family="cub")
@
includes the covariate matrix \code{W} to explain only the feeling component, whereas:
<<echo=TRUE,eval=FALSE>>=
GEM(Formula(ordinal~Y|W|0), family="cub")
@
specifies a \cub\, model with covariates for both components. 

To test for a \emph{shelter effect}, it is possible to implement either a \cub\, model  without covariates  by specifying the value of the \emph{shelter} $s$ in the main call to \code{GEM()}:
<<eval=FALSE,echo=TRUE>>=
GEM(Formula(ordinal~0|0|0), family="cub", shelter=s)
@

or to implement the more comprehensive specification concerning \gecub models:
<<echo=TRUE,eval=FALSE>>=
GEM(Formula(ordinal~Y|W|X), family="cub", shelter=s)
@

\cush models can be fitted by the main function \code{GEM()}, optionally with the specification of covariates, by specifying \code{family} argument to \code{"cush"}:
<<eval=FALSE,echo=TRUE>>=
GEM(Formula(ordinal~0), family="cush", shelter = s)     # without covariates
GEM(Formula(ordinal~X), family="cush", shelter = s)     # with covariates
@


If \code{Y}, \code{W} and \code{Z} are matrices of values of selected covariates for explaining uncertainty, feeling and overdispersion, respectively, \cube\, models can be estimated via:
<<eval=FALSE,echo=TRUE>>=
GEM(Formula(ordinal~0|0|0), family="cube")    # without covariates
GEM(Formula(ordinal~0|W|0), family="cube")    # with covariates for feeling
GEM(Formula(ordinal~Y|W|Z), family="cube")    # with covariates for all parameters
@

For \cube\, model with no covariates, the default option \code{expinform = FALSE} implies that the variance-covariance matrix of estimates is based on the observed information matrix. If \code{expinform = TRUE} is added as an extra argument, the procedure considers the expected information matrix instead. Note that the fitting procedure for \cube\, models with covariates for all components might be particularly slow: hence, in the first runs, we tentatively suggest to lower the tolerance and the maximum number of iterations allowed for the optimization algorithm by specifying \code{maxiter} and \code{toler} arguments in the function call (for instance, \code{maxiter=50}, \code{toler=1e-3}). Alternatively, one might estimate a random sub-sample of data and adopt the resulting estimates as preliminary values for a \gem\, fit to the whole data.

Finally, estimation of \ihg models is implemented via:
<<echo=TRUE,eval=FALSE>>=
GEM(Formula(ordinal~0),family="ihg")         # without covariates
GEM(Formula(ordinal~U),family="ihg")         # with covariates
@

Every call to the main \code{GEM} creates an object of the class \texttt{"GEM"} -to which the subclass specified by the \code{family} argument is appended. Next sections provide details on  inferential issues for \gem models, and the corresponding implemented \code{S3} methods.


\section{Implementation and inference in CUB}

Package \pkg{CUB} fits the models presented in Section \ref{sec:cub} using Maximum Likelihood (ML) estimation as performed by classical EM procedures \citep{Mac1997}. For every unspecified detail about \gem models, see \cite{IannarioPiccolo2016a}. Further, details about the EM steps for \cub\, models can be found in \cite{Piccolo2006}, for \cube\, models in \cite{Iannario2014}; \cite{Piccolo2015}, and for \gecub models in \cite{IannarioPiccolo2016b}. For \cush models, the reader is referred to 
\cite{CapecchiPiccolo2016} for a full discussion, whereas for \ihg see \cite{Delia2003}.

\subsection{Method implementation}

For an object of class \code{"GEM"}, storing the information of the fitting procedure of a \gem model to given data, the standard \code{S3} method \code{logLik()} applies to return the maximized log-likelihood at the final ML estimates. 
Thus, in the baseline version of a \gem model, parameters are estimated by maximizing the log-likelihood:
$$\ell(\bm \theta)=\sum_i^n\log\big[\p{R_i=r\mid \,\bm y_i,\, \bm w_i,\,\bm \theta}\big].$$

The optimization procedure is run via the function \code{optim()}, except for \cub\, models without covariates (for which the EM steps are directly implemented:  \cite{DelPicc2005}) and for \cush models without covariates (for which the ML estimate of the \textit{shelter} parameter can be straightforwardly computed, see  \cite{CapecchiPiccolo2016}). Whenever bounds on parameter estimates have to be required, the box-constrained option for \code{optim()} is selected, and it considers a restriction of the admissible range of the parameter space: in this case, \code{method = "L-BFGS-B"} is selected for multidimensional optimization problems (with finite difference approximation selected with the option \code{gr = NULL}), while \code{method = "Brent"} is selected for the unidimensional optimization problem when \ihg models with no covariate are fitted. In all other cases, the default Nelder-Mead method is selected. 

In addition, for given ordinal data and model-specific parameters, log-likelihood values can be computed via the following main calls (the default option is that neither covariate nor \emph{shelter effect} are included):
<<eval=FALSE,echo=TRUE>>=
llCUB  <- loglikCUB(ordinal,m,param,Y=Y,W=W,X=X,shelter=s)
llCUBE <- loglikCUBE(ordinal,m,param,Y=Y,W=W,Z=Z)
llIHG  <- loglikIHG(ordinal,m,param,U=U)
llCUSH <- loglikCUSH(ordinal,m,param,X=X,shelter=s)
@
The vector \code{param} lists the parameters for the specified model, with the uncertainty parameters always specified first, followed by the feeling ones and, last, by the overdispersion or \textit{shelter} parameters when considering \cube\, or \cub\, models with \textit{shelter effect}, respectively. For \ihg and \cush models, \code{param} is a one-dimensional vector when no covariates are considered, otherwise it lists all the regression coefficients for the preference and the \textit{shelter} parameter, respectively. When covariates are included in the model, the first component of the corresponding (sub)vector is the intercept term  (not to be included in the matrix), followed by as many parameters as the number of covariates used to explain the given component.


%In order to control the convergence of the optimization procedure within the EM algorithm at each step is possible to check the convergence code borrowed from calls to \code{optim\$convergence}. 
The variance-covariance matrix of final ML estimates for a fitted model can be retrived by applying the standard \code{S3} method \code{vcov}() to an object of class \code{GEM}. For testing \ihg models with or without covariates, for \cush models with covariates as well as for \cube\, models with covariates for all the three parameters, the option \code{hessian = TRUE} is selected when calling \code{optim()} to return a numerically differentiated Hessian matrix at the final estimates. For the other models, the code implements specific inferential results, as the explicit formulae of the observed and expected variance-covariance matrices \citep{Piccolo2006, Piccolo2015, Iannario2012a, Iannario2014}. 

To compute the (observed) variance-covariance matrix corresponding to given ordinal data and model-specific parameters (for \cub\, and \cube\, models), the following functions are available:
<<echo=TRUE,eval=FALSE>>=
varCUB <- varmatCUB(ordinal, m, param, Y = Ycovar, W = Wcovar, X = Xcovar,
                    shelter = shelter)
@
<<echo=TRUE,eval=FALSE>>=
varCUBE <- varmatCUBE(ordinal, m, param, Y = Ycovar, W = Wcovar, Z = Zcovar, 
  			expinform = FALSE)
@
No covariate is considered neither for \code{varmatCUB()} nor for \code{varmatCUBE()} by default. In addition, default options for \code{varmatCUB} do not consider \textit{shelter effect}. For \code{varmatCUBE()}, the default option \code{expinform = FALSE} provides the variance-covariance matrix based on the observed information matrix. Setting \code{expinform = TRUE},  the variance-covariance matrix will be based on the expected information matrix instead (as advisable when running simulation studies). 


Other standard S3 methods implemented for \code{GEM} objects include:
\begin{itemize}
\item \code{coef()} to retrieve final parameter estimates;
\item \code{logLik()} to obtain the maximized log-likelihood at the final ML estimates;
\item \code{BIC()} to compute the BIC index of the fitted model (in order to compare non-nested models \citep{Schwarz1978});
\item \code{fitted()} to display fitted probability distibution.
\item \code{summary()} to display summary results of the fitting procedures:
\begin{enumerate}
\item ML parameter estimates of the fitted model, asymptotic standard errors and Wald-tests (based on the asymptotic variance-covariance matrix as returned by \code{vcov()});
\item For testing purposes, a list of likelihood-based measures (Log-likelihood, Mean Log-likelihood, and for models without covariates also Log-likelihood of the saturated model, Deviance, Log-likelihood of the Uniform and  shifted Binomial). 
\item  BIC, AIC \citep{Akaike1974} and ICOMP \citep{Bodzogan1990} measures for the fitted model, in order to compare non-nested models.
\end{enumerate}
\end{itemize}


Every call to the main function \code{GEM()} stores the following slots to record some further fitting information:
\begin{itemize}
\item \code{$time}: the time employed for the estimation procedure;
\item \code{$niter}: the number of iterations required to the EM algorithm for convergence within the fixed tolerance. 
\item \code{$formula}: to retrieve the Formula object given as input.
\end{itemize}

Parameter correlation matrix corresponding to an object of class \code{GEM} can be computed via \code{cormat()}, or printed out by specifying \code{correlation=TRUE} when calling \code{summary()}.\\

Table \ref{tab:call} summarizes the main calls to function \code{GEM} to fit the different models. Possible extra arguments to include in the call will be further illustrated and discussed along with the examples.

\begin{sidewaystable}
\centering
\begin{tabular}[|c|c|]{p{10cm}p{11cm}}
\hline Model & Call \\
\hline \hline \multicolumn{2}{c}{\cub \, models}\\
\hline with no covariates & \code{GEM(Formula(ordinal~0|0|0),family="cub")} \\
\hline with no covariates and shelter effect & \code{GEM(Formula(ordinal~0|0|0),family="cub",shelter=shelter)} \\
\hline with covariate matrix \code{Y} for uncertainty & \code{GEM(Formula(ordinal~Y|0|0),family="cub")} \\
\hline with covariate matrix \code{W} for feeling & \code{GEM(Formula(ordinal~0|W|0),family="cub")} \\
\hline with covariate matrices \code{Y} and \code{W} for uncertainty and feeling & \code{GEM(Formula(ordinal~Y|W|0),family="cub")} \\
\hline with covariate matrices \code{Y}, \code{W}, \code{X} for uncertainty, feeling and shelter effect & \code{GEM(Formula(ordinal~Y|W|X),family="cub")} \\
\hline \hline \multicolumn{2}{c}{\cube \, models}\\
\hline with no covariates & \code{GEM(Formula(ordinal~0|0|0),family="cube")} \\
\hline with covariate matrix \code{W} for feeling & \code{GEM(Formula(ordinal~0|W|0),family="cube")} \\
\hline with covariate matrices \code{Y},\code{W} and \code{Z} for uncertainty, feeling  and overdispersion & \code{GEM(Formula(ordinal~Y|W|Z),family="cube")} \\
\hline \hline \multicolumn{2}{c}{\cush \, models}\\
\hline with no covariates & \code{GEM(Formula(ordinal~0),family="cush",shelter=shelter)} \\
\hline with covariate matrix \code{X} for shelter effect & \code{GEM(Formula(ordinal~X),family="cush",shelter=shelter)} \\
\hline \hline \multicolumn{2}{c}{\ihg \, models}\\
\hline with no covariates & \code{GEM(Formula(ordinal~0),family="ihg")} \\
\hline with covariate matrix \code{U} for the preference parameter & \code{GEM(Formula(ordinal~U),family="ihg")} \\
\hline \hline
\end{tabular}
\caption{Summary of possible calls to \code{GEM()}}\label{tab:call}
\end{sidewaystable}


\subsection{Plot facilities}

One of the advantageous features of \gem models is that they allow for easy interpretation of parameters. This is especially convincing for models with no covariates or for (at most) one dichotomous variable, a case for effective graphical devices. 

For an object of class \code{GEM}, the generic function \code{makeplot(object)} will return a plot which compares fitted probabilities and observed relative frequencies. Moreover, for \cub\, models, given the one-to-one correspondence between the probability distribution of $R$ and the parameter vector $\bm \theta=(\pi,\xi)'$, a relevant feature of the approach is the possibility to visualize a \cub\, model as a point in the parameter space, that is the unit square. To this scope, the user is referred to functions  \code{cubvisual}(), \code{multicub}() and \code{cubshevisual}(). Similar features are performed by \code{cubevisual}() and \code{multicube}() for \cube\, models.

Other instances of graphical analysis will be shown along with discussion of empirical evidence in Section \ref{sec:use}.


\section{Package CUB in use}\label{sec:use}
Some basics routines that can help in the comprehension of \cub\, models and their extensions are presented. The following code produces Figure \ref{fig:cubeprob}, in which the plots of \cube\, probability mass functions for varying parameters are shown,  as in \cite{Iannario2015}. Figure \ref{fig:cubeprob} displays also the expectation and the variance of the given \cube\, distributions, computed via \code{expcube()} and \code{varcube()}, respectively. Similar functions for \cub\, models are \code{expcub00()} and \code{varcub00()}. 

<<eval=FALSE,echo=TRUE>>=
###########################################################################
### Selection of 9 CUBE models with csi = 0.3 over 9 ordinal categories  ***
###########################################################################
m<-9; csi<-0.3
########### varying pai and phi parameters 
paival<-seq(0.9,0.1,by=-0.1)
phival<-rep(c(0.05,0.1,0.3),times=3)
model<-cbind(paival,phival); nmodels<-nrow(model)
##########################################
par(mfrow = c(3,3))                   
par(mar = c(2,4,3,1)+0.1)           
### Probability distribution plots for jmod=1,2,...,9
for (jmod in 1:nmodels){
  paij<-model[jmod, 1]
	phij<-model[jmod, 2]
	prob<-probcube(m, paij, csi, phij)
	exp<-expcube(m, paij, csi, phij)
	var<-round(varcube(m, paij, csi, phij), digits = 2)
	plot(1:m, prob, type = "h", lwd = 3, ylim = c(0,0.4), xlab = "",
       ylab = "Pr(R=r)", las = 1)
	text(2.3, 0.38, bquote(pi == .(paij)), cex = 0.7)
  text(5, 0.38, bquote(xi == .(csi)), cex = 0.7)
  text(7.8, 0.38, bquote(phi == .(phij)), cex = 0.7)
  text(7, 0.285, bquote(sigma^2 == .(var)), cex = 0.7)
  text(3, 0.28, bquote(mu == .(exp)), cex = 0.7)
}
par(mar = c(5,4,4,2)+0.1)             ### reset standard margins
par(mfrow = c(1,1))                   ### reset plot screen
@

\begin{figure}[h!]
\centering
<<eval=TRUE,echo=FALSE,comment=NA,fig.width=5, fig.height=5.5>>=
m<-9; csi<-0.3
########### varying pai and phi parameters 
paival<-seq(0.9,0.1,by=-0.1)
phival<-rep(c(0.05,0.1,0.3),times=3)
model<-cbind(paival,phival); nmodels<-nrow(model)
##########################################
par(mfrow = c(3,3))                   ### split the plot screen
par(mar = c(2,4,3,1)+0.1);            ### set new margins
### Probability distribution plots for jmod=1,2,...,9
for (jmod in 1:nmodels){
  paij<-model[jmod,1];
  phij<-model[jmod,2];
	prob<-probcube(m,paij,csi,phij);
	exp<-expcube(m,paij,csi,phij);
	var<-round(varcube(m,paij,csi,phij),digits=2);
	plot(1:m,prob,type="h",lwd=3,ylim=c(0,0.4),xlab = "", 
       ylab = "Pr(R=r)", las=1);
  text(2.3, 0.38, bquote(pi == .(paij)), cex = 0.7);
  text(5, 0.38, bquote(xi == .(csi)), cex = 0.7);
  text(7.8, 0.38, bquote(phi == .(phij)), cex = 0.7);
  text(7, 0.285, bquote(sigma^2 == .(var)), cex = 0.7);
  text(3, 0.28, bquote(mu == .(exp)), cex = 0.7);
}
par(mar = c(5,4,4,2)+0.1);                ### reset standard margins
par(mfrow = c(1,1));  
@
\caption{Probability distributions of \cube\, models.}\label{fig:cubeprob}
\end{figure}


In the following subsections we discuss the usage of package \pkg{CUB} by means of two real data sets, \code{univer} and \code{relgoods}, bundled within the package.  
Detailed descriptions, related sample experiment and further references can be found at \url{http://www.labstat.it/home/research/resources/cub-data-sets-2/}. 

\subsection{CUB models}
Data set \code{univer} arises from a sample survey on students' evaluation of the Orientation services that has been administed across all the Faculties of University of Naples Federico II in five waves. Participants were asked to express
their ratings on a 7 point Kikert-type scale (1 = ``very unsatisfied", 7 = ``extremely satisfied") on the following items:
\begin{itemize}
 \item \texttt{informat}: Level of satisfaction about the acquired information
 \item \texttt{willingn}: Level of satisfaction about the willingness of the staff
 \item \texttt{officeho}: Level of satisfaction about the opening hours
 \item \texttt{competen}: Level of satisfaction about the competence of the staff
 \item \texttt{global}: Level of global satisfaction
\end{itemize}
Data set collected in 2002 consists of 2179 observations: the first $7$ columns correspond to subjects covariates (for instance, the dichotomous variable \texttt{gender}, equal to 0 for men and to 1 for women, and the continuous measurement \texttt{age}).

As a first step, we show how to visualize simultaneously the ordinal variables included in the data set \code{univer} by means of the function \code{multicub()}: it fits a \cub\, model to every ordinal variable. Then each model is represented as a point in the parameter space corresponding to the obtained ML uncertainty $1-\hat{\pi}$ and feeling $1-\hat{\xi}$ estimates: indeed, estimable parameters are $\pi$ and $\xi$, but interpretation depends on the orientation of the measurement scale. If the evaluation is positive in the direction of the scale, then the actual measure of feeling is $1-\xi$;  flag arguments \code{csiplot} and \code{paiplot} may be switched to \code{TRUE} if direct parameter visulization is preferred. Notice that \code{multicub()} allows for a comparative visual analysis of vectors of rating data, possibly with different lengths and also over different numbers of categories: for this reason, they are required to be stored in a list when calling \code{multicub()} and optionally a vector \code{mvett} can be set as input argument to specify the corresponding scale lengths. If only one ordinal variable is considered, then the same visual analysis is implemented via \code{cubvisual()}.

<<eval=FALSE,echo=TRUE>>=
data(univer)
listord<-univer[,8:12] # only ratings, excluding covariates
labels<-names(univer)[8:12]
multicub(listord, labels = labels, 
    	caption ="CUB models on Univer data set", pch = 19, 
    	pos = c(1,rep(3, ncol(listord)-1)),ylim=c(0.75,1),xlim=c(0,0.4))
@

\begin{figure}[h!]
\centering
<<eval=TRUE,echo=FALSE,comment=NA,fig.width=4.5, fig.height=5>>=
data(univer)
listord<-univer[,8:12] # only ratings, excluding covariates
labels<-names(univer)[8:12]
multicub(listord, labels = labels, 
    	caption ="CUB models on Univer data set", symbols = 19,
    	pos = c(1,rep(3, ncol(listord)-1)),ylim=c(0.75,1),xlim=c(0,0.4))
@
\caption{\cub\, models without covariates for the items of \code{univer} ($m=7$).}\label{fig:multicub}
\end{figure}

%\includegraphics[height=9cm,width=9.5cm]{multicub.eps}%

From Figure \ref{fig:multicub}, we may conclude that the highest feeling has been expressed for the willingness of the staff, the lowest for the scheduled office hours. However, since the latter item is affected by the highest uncertainty, it deserves further specification; thus, hereafter we focus on the item \texttt{officeho}. 

<<cub00,eval=TRUE,echo=TRUE,fig.width=5, fig.height=5.5,comment=NA>>=
## CUB model without covariates for "officeho"
cub_00<-GEM(Formula(officeho~0|0|0), family="cub",data=univer)
summary(cub_00,digits=5)
@

The estimated \cub\, model is identified with uncertainty and feeling given by:
<<eval=TRUE,echo=TRUE,cache=TRUE,comment=NA>>=
param<-coef(cub_00,digits=3)
param
uncertainty<-1-param[1]
uncertainty
feeling<-1-param[2]
feeling
@

The following call:
<<eval=FALSE,cache=TRUE, echo=TRUE,fig.width=5, fig.height=5.5,comment=NA>>=
## CUB model without covariates
makeplot(cub_00)
@
provides a plot showing both the observed relative frequencies and the fitted probabilities, as in Figure \ref{fig:cubofficeho} (left panel).  


Accurate initial values for parameters allow to reach convergence to the ML solutions in acceptable time \citep{Ian12c}: in this regard, the EM algorithm for \cub\, models without covariates is initialized within \code{GEM(Formula,family="cub")} by means of \code{inibest(m,freq)}, a routine that returns preliminary parameter estimates of a \cub\, model for a given frequency distribution:

<<eval=TRUE,echo=TRUE,comment=NA>>=
data(univer)
freq<-tabulate(univer$officeho,nbins = 7)
ini<-inibest(m,freq) # preliminary estimates for c(pai,csi)
ini
@

Features of \code{inibest(m,freq)} can be exploited to obtain preliminary parameter estimates for any purpose. Similarly, \code{inigrid()} runs a grid search across the parameter space and it returns the parameter vectors corresponding to the maximum of log-likelihood on the given grid. \\

In order to improve the fit and interpretation of data, we introduce covariates in the model. Specifically, to explain the feeling component we consider the dichotomous covariate \code{freqserv}, indicating the usage frequency of the service with levels 0 and 1 for non-regular and regular users. 
<<cub_csi,cache=TRUE,eval=TRUE,echo=TRUE,comment=NA>>=
cub_csi<-GEM(Formula(officeho~0|freqserv|0), family="cub",data=univer)
summary(cub_csi,digits=3)
@

For the previous example, the regression coefficients for the logistic link (all of which are significant) can be retrieved by:

<<cache=TRUE,eval=TRUE,echo=TRUE,comment=NA>>=
gama<-coef(cub_csi)[2:3]
gama
gama0<-gama[1]  ## intercept term
gama1<-gama[2]
@

Then, to compute the feeling parameter corresponding to non-regular and regular users according to \eqref{paicsilogis}, respectively, it is possible to run:
<<eval=TRUE,echo=TRUE>>=
csi_nru <- logis(0, gama)   ## csi parameter for non regular user (freqserv=0)
csi_nru
csi_ru  <- logis(1, gama)   ## csi parameter for regular user (freqserv=1)
csi_ru
@
where the routine \code{logis()} returns the logistic transform componentwise of a standard matrix (binding the array $Y$ of covariates with a vector of ones to include an intercept term, placed in the first column) multiplied with the parameter vector \code{param}.
In conclusion, the feeling component is specified via:
\[
\begin{array}{lcl}
logit(1-\xi_i)&=&   -\gamma_0  - \gamma_1\,freqserv_i, \qquad i=1,\dots,n.
\end{array}
\]
In addition to a likelihood ratio test obtained by comparing the log-likelihoods of the two models, we remark that the reduction in BIC index from \code{BIC(cub_00)}$= 7535.208$  to \code{BIC(cub_csi)}$=7431.773$ strongly supports the inclusion of the covariate \code{freqserv} in the model. For fitting purposes it is also possible to compute the $X^2$ statistic of Pearson by implementing:

<<cache=TRUE,echo=TRUE,eval=TRUE,comment=NA>>=
pai<-coef(cub_csi)[1]
gama<-coef(cub_csi)[2:3]
data(univer)
pearson<-chi2cub(m=7, univer$officeho, W = univer$freqserv, pai, gama)
str(pearson)
@

Furthermore, the call to \code{makeplot(cub_csi)} produces a plot comparing the two fitted probability distributions conditional on the values of the dichotomous covariate, as shown by Figure \ref{fig:cubofficeho} (right panel).
\begin{center}
\begin{figure}[h!]
\begin{minipage}{0.5\textwidth}
\centering
%\includegraphics[height=7.5cm,width=7.5cm]{figb.eps}%
<<cache=TRUE,eval=TRUE,echo=FALSE,fig.width=5, fig.height=5.5,comment=NA>>=
makeplot(cub_00)
@
\end{minipage}%
\begin{minipage}{0.5\textwidth}
\centering
<<cache=TRUE,eval=TRUE,echo=FALSE,fig.width=5, fig.height=5.5,comment=NA>>=
makeplot(cub_csi)
@
%\includegraphics[height=7.5cm,width=7.5cm]{figcovcsi.jpeg}%
\end{minipage}
\caption{\cub\, without and with dichotomous covariate \texttt{freqserv} for feeling on \texttt{officeho}.}\label{fig:cubofficeho}
\end{figure}
\end{center}

Note that the (normalized) dissimilarity index between observed and fitted probability distribution for a given fitted model without covariate is reported in the main title of the corresponding plot (see Section \ref{sec:extra} for more details).

When covariates are specified for the feeling component, optimal preliminary estimates of the corresponding coefficients \citep{Ian08} are implemented via \code{inibestgama(m, ordinal, W)}, which is the option set to initialize the EM algorithm when calling \linebreak \code{GEM(Formula~0|W|0,family="cub")}:
<<cache=TRUE,eval=TRUE,echo=TRUE,comment=NA>>=
data(univer)
inicsicov<-inibestgama(m,univer$officeho,W=univer$freqserv)
inicsicov
@

We conclude this subsection by briefly presenting a case study in which a continuous covariate and a dichotomous one are jointly considered: in particular, we include the deviation from the mean of the logarithmic transform of \code{Age}, that is the covariate \texttt{lage}, and the covariate \texttt{gender} to explain both feeling and uncertainty.
<<cub_pai_csi,cache=TRUE,eval=TRUE,echo=TRUE,comment=NA>>=
data(univer)
age<-univer$age
lage<-log(age)-mean(log(age))         # Deviation from mean of logged Age
cub_pai_csi<-GEM(Formula(officeho~lage+gender|lage+freqserv|0),family="cub",data=univer)
summary(cub_pai_csi,correlation=TRUE,digits=3)
@

When covariates are included for both parameters as in the previous example, the estimates will be the corresponding coefficients of the logistic regression for the uncertainty and the feeling components:
<<eval=TRUE,echo=TRUE>>=
coef(cub_pai_csi,digits=3)
@
%' <<eval=TRUE,echo=FALSE>>=
%' m<-7
%' data<-univer
%' age<-univer[,3]
%' lage<-log(age)-mean(log(age)) #Deviation from mean of logged Age
%' gender<-univer[,4]                           
%' Y<-cbind(lage, gender)
%' W<-cbind(lage, freqserv)
%' cub_pai_csi<-GEM(Formula(ordinal~Y|W|0),family="cub")
%' coef(cub_pai_csi)
%' @
Then, the resulting \cub\, model can be summarized by:
\[
\left\{\begin{array}{lcl}
logit(1-\pi_i)&=&   -\beta_0  - \beta_1\,lage_i  -\beta_2\,gender_i \\
logit(1-\xi_i)&=&   -\gamma_0  -\gamma_1\,lage_i -  \gamma_2\,freqserv_i.
\end{array}
\right.
\]

Since no plot is directly provided as output, the performance of the \cub\, model with significant covariates on feeling and uncertainty parameters may be summarized as in Figure \ref{fig:loggedAge}, obtained by the following code:

<<eval=FALSE,echo=TRUE,cache=TRUE,comment=NA>>=
data(univer)
age<-univer$age
average<-mean(log(age))
ageseq<-log(seq(17, 51, by = 0.1))-average
param<-coef(cub_pai_csi)
####################
paicov0<-logis(cbind(ageseq, 0), param[1:3])
paicov1<-logis(cbind(ageseq, 1), param[1:3])

csicov0<-logis(cbind(ageseq, 0), param[4:6])
csicov1<-logis(cbind(ageseq, 1), param[4:6])
####################
plot(1-paicov0, 1-csicov0, type = "n", col = "blue", cex = 1,
     xlim = c(0, 0.6), ylim = c(0.4, 0.9), font.main = 4, las = 1,
     main = "CUB models with covariates",
     xlab = expression(paste("Uncertainty     ", (1-pi))),
     ylab = expression(paste("Feeling      ", (1-xi))), cex.main = 0.9, 
     cex.lab = 0.9)
lines(1-paicov1, 1-csicov1, lty = 1, lwd = 4, col = "red")
lines(1-paicov0, 1-csicov0, lty = 1, lwd = 4, col = "blue")
lines(1-paicov0, 1-csicov1, lty = 1, lwd = 4, col = "black")
lines(1-paicov1, 1-csicov0, lty = 1, lwd = 4, col = "green")
legend("bottomleft", legend = c("Man-User", "Man-Not User", 
   "Woman-User", "Woman-Not User"), col = c("black", "blue", "red", "green"),
   lty = 1, text.col = c("black", "blue", "red", "green"), cex = 0.6)
text(0.1, 0.85, labels = "Young", offset = 0.3, cex = 0.8, font = 4)
text(0.5, 0.5, labels = "Elderly", offset = 0.3, cex = 0.8, font = 4)
@


\begin{figure}[h!]
\centering
<<eval=TRUE,echo=FALSE,fig.width=4.5,fig.height=4.5,comment=NA>>=
average<-mean(log(age))
ageseq<-log(seq(17, 51, by = 0.1))-average;
param<-coef(cub_pai_csi)
####################
paicov0<-logis(cbind(ageseq, 0), param[1:3])
paicov1<-logis(cbind(ageseq, 1), param[1:3])

csicov0<-logis(cbind(ageseq, 0), param[4:6])
csicov1<-logis(cbind(ageseq, 1), param[4:6])
####################
plot(1-paicov0, 1-csicov0, type = "n", col = "blue", cex = 1,
     xlim = c(0, 0.6), ylim =  c(0.5, 1), font.main = 4, las = 1,
     main = "CUB models with covariates",
     xlab = expression(paste("Uncertainty     ", (1-pi))),
     ylab = expression(paste("Feeling      ", (1-xi))), cex.main = 0.9, 
     cex.lab = 0.9)
lines(1-paicov1, 1-csicov1, lty = 1, lwd = 4, col = "red")
lines(1-paicov0, 1-csicov0, lty = 1, lwd = 4, col = "blue")
lines(1-paicov0, 1-csicov1, lty = 1, lwd = 4, col = "black")
lines(1-paicov1, 1-csicov0, lty = 1, lwd = 4, col = "green")
legend("bottomleft", legend = c("Man-User", "Man-Not User", 
   "Woman-User", "Woman-Not User"), col = c("black", "blue", "red", "green"),
   lty = 1, text.col = c("black", "blue", "red", "green"), cex = 0.6)
text(0.08, 0.95, labels = "Young", offset = 0.3, cex = 0.8, font = 4)
text(0.4, 0.7, labels = "Elderly", offset = 0.3, cex = 0.8, font = 4)
@
\caption{\cub\, models with covariates: logged \texttt{age} and \texttt{gender} for uncertainty and logged age and \texttt{freqserv} for feeling.}
\label{fig:loggedAge}
\end{figure}

Summarizing, the sketch of analysis so pursued indicates that satisfaction increase with age whereas indecision decreases, and that men are more satisfied than women across all profiles.

\subsection{Comparing IHG and CUBE models}
As shown in Figure \ref{fig:cubeprob}, \cube\, models offer a wide flexibility in fitting data having different shapes and features. 
We now discuss how to perform an analysis on ordinal data with a \cube\, model by considering the variable \texttt{willingn} within \code{data(univer)}. 
<<cube,cache=TRUE,eval=TRUE,echo=TRUE,comment=NA>>=
starting<-c(0.5, 0.5, 0.1)       
cubefit<-GEM(Formula(willingn~0|0|0),family="cube", starting = starting, 
             maxiter = 100, toler = 1e-4,data=univer)
summary(cubefit,digits=7)
@

As shown with the current example, the user can set some extra options for the estimation procedure: the maximum number of iterations allowed for the optimization procedure (the default value for \cube\, model is \code{maxiter = 1000}), the error tolerance to stop iterations (by changing the default option \code{toler = 1e-6}), and most importantly the argument \code{starting}, that is the vector of starting values to initialize the EM algorithm. The default option is based on the routine \code{inibestcube()}, a convenient choice of preliminary estimates for parameters in \cube\, models. For \cube\, models with covariates, only for feeling or for all the three parameters, initial estimates of parameters are implemented via \code{inibestcubecsi()} and \code{inibestcubecov()}, which are also set as default options when calling
<<eval=FALSE,echo=TRUE,comment=NA>>=
GEM(Formula(ordinal~0|W|0),family="cube")
@
and
<<eval=FALSE,echo=TRUE,comment=NA>>=
GEM(Formula(ordinal~Y|W|Z),family="cube")
@
respectively.

Given the shape of the distribution of the selected ordinal data (see Figure \ref{fig:cubeihg}, left panel), the low level of uncertainty (\code{1-coef(cubefit)[1]}$=0.1174$) and in particular, the extreme feeling \linebreak (\code{1-coef(cubefit)[2]}$=0.8752$) corresponding to the modal value at the last category $m=7$, we check if the more parsimonious \ihg model provides comparable goodness of fit. For such a model, the ML estimation procedure is initialized by considering as starting value the moment estimate of the preference parameter $\theta$, computed via the function \code{iniihg(m,freq)}. 
<<ihg,cache=TRUE,eval=TRUE,echo=TRUE,comment=NA>>=
ihgfit<-GEM(Formula(willingn~0),family="ihg",data=univer)
summary(ihgfit,digits=7)
@

In order to check if the fit improvement obtained with \cube\, models justifies the inclusion of two additional parameters, that is, if the more parsimonious \ihg model should be discarded in favor of \cube\, model, we perform a likelihood ratio test:  

<<cache=TRUE,eval=TRUE,echo=TRUE,comment=NA>>=
llcube<-logLik(cubefit)
llihg<-logLik(ihgfit)
lrt<- -2*(llihg - llcube)   ### 495.9135
pv<- 1-pchisq(lrt, 2)       ### 0
@

It confirms that a consistently better fit is ensured by the \cube\, model. Figure \ref{fig:cubeihg} displays the observed and fitted probability distributions for both models as returned by \code{makeplot(cubefit)} and \code{makeplot(ihgfit)}, respectively.

\begin{figure}[h!]
 \begin{minipage}{0.5\textwidth}
 <<eval=TRUE,echo=FALSE,cache=TRUE,comment=NA>>=
 data(univer)
starting<-c(0.5, 0.5, 0.1)
 cubefit<-GEM(Formula(willingn~0|0|0),family="cube", starting = starting, maxiter = 100, toler = 1e-4,data=univer)
 makeplot(cubefit)
 @
\end{minipage}%
 \begin{minipage}{0.5\textwidth}
 <<eval=TRUE,echo=FALSE,cache=TRUE,comment=NA>>=
 ihgfit<-GEM(Formula(willingn~0),family="ihg",data=univer)
 makeplot(ihgfit)
 @
 \end{minipage}
 \caption{\cube\, (left panel) and \ihg (right panel) fit to the data.}
\label{fig:cubeihg}
\end{figure}

\subsection{Models for shelter effect}
As already mentioned in Section \ref{sec:cub}, the class of \cub\, mixture models includes a specific extension to fit the so-called \textit{shelter effect}, arising in presence of an inflated category. We show how to perform the analysis of a possible \textit{shelter effect} on the data set \code{data(relgoods)},  a survey designed to assess importance of relational goods and involvement in leisure time activities: data were collected in December 2014 in the metropolitan area of Naples, Italy. Every participant was asked to measure his/her personal score for relational goods (for instance, time dedicated
to friends and family) and his/her preferences towards leisure activities on a $10$
point ordinal scale, ranging from 1 = ``Not at all, nothing, never", to 10 = ``Totally, extremely important, always". On the whole, respondents gave rating scores to $34$ items. 
The sample is composed by $2459$ interviews and $16$ subjects' covariates (including gender, education, residence, marital status and age, among others).  For these data some missing values are present.

Here, the implementation of the so-called \textit{shelter effect} within \cub\, models is discussed. Let us consider the ordinal variable \texttt{Writing}, indicating the respondents' degree of engagement in writing as a preferred activity for leisure time. 

% \texttt{Drawing}, indicating the respondents' degree of engagement in drawing as a preferred activity for leisure time. 

One observes an excess in frequency corresponding to the first category, so we first fit an extended \cub\, model with \textit{shelter effect} by running the following code (notice that both the available parameterizations \eqref{shelter1} and \eqref{shelter2} are reported on screen):

<<cub_she,eval=TRUE,echo=TRUE,comment=NA>>=
cub_she<-GEM(Formula(Writing~0|0|0),family="cub", shelter = 1,
             maxiter=500,toler=1e-3,data=relgoods) 
summary(cub_she)
@

As shown by the printed output, the observed distribution is affected by a low weight for the shifted Binomial component (\code{pai1<-coef(cub_she)[1]}), and a moderately high weight for the Uniform component (\code{pai2<-coef(cub_she)[2]}): Figure \ref{fig:cubshe_cush} (left panel) shows the plot returned by running \code{makeplot(cub_she)}, which compares the observed frequencies and the fitted probabilities.


As a matter of fact the data set under consideration includes several instances of distributions characterized by a huge heterogeneity and an inflated category. As already explained in Section \ref{sec:cub}, such occurrence motivated the specification of \cush models. For the variable \texttt{Writing} here considered, the fit of a \cush model is implemented by the code: 

<<cush,cache=TRUE,eval=TRUE,echo=TRUE,comment=NA>>=
cush<-GEM(Formula(Writing~0),family="cush",shelter = 1,data=relgoods) 
summary(cush,digits=3)
@

Summarizing, \cub\, model with \textit{shelter effect} provides a better fit in terms of log-likelihood and BIC indexes. 
As for other models within the \gem family, \code{makeplot(cush)} compares fitted and observed frequency distributions as displayed in Figure \ref{fig:cubshe_cush}  (right panel).

\begin{figure}[h!]
\begin{minipage}{0.5\textwidth}
<<eval=TRUE,echo=FALSE,comment=NA>>=
data(relgoods)
cub_she<-GEM(Formula(Writing~0|0|0),family="cub", shelter = 1,
             maxiter=500,toler=1e-3,data=relgoods) 
makeplot(cub_she)
@
%\includegraphics[width=7cm,height=7.5cm]{cube_will.eps}%
\end{minipage}%
\begin{minipage}{0.5\textwidth}
<<eval=TRUE,echo=FALSE,comment=NA>>=
cush<-GEM(Formula(Writing~0),family="cush",shelter = 1,data=relgoods) 
makeplot(cush)
@
%\includegraphics[width=7cm,height=7.5cm]{ihg_will.eps}%
\end{minipage}
\caption{\cub\, with \textit{shelter} (left panel) and \cush (right panel) fit to the data.}
\label{fig:cubshe_cush}
\end{figure}


The possibility of including covariates to explain the \textit{shelter effect} in a \cush model is presented in the next section along with the simulation routines offered by package \pkg{CUB}.


\subsection{Simulation routines and experiments}
Package \pkg{CUB} offers several facilities to perform  simulation experiments involving \gem models. In order to generate $n$ pseudo-random numbers from \cub, \cube, \cub\, with \textit{shelter}, \cush and \ihg distributions, respectively, the following functions are available:
<<eval=FALSE,echo=TRUE,comment=NA>>=
simCUB   <- simcub(n, m, pai, csi)
simCUBE  <- simcube(n, m, pai, csi, phi)
simCUBshe <- simcubshe(n, m, pai, csi, delta, shelter)
simCUSH  <- simcush(n, m, delta, shelter)
simIHG   <- simihg(n, m, theta)
@
For instance, consider the following example, where the theoretical and fitted probability distributions of a \cub\, model are displayed for simulated data in Figure \ref{fig:simcub}:
<<eval=FALSE,echo=TRUE,comment=NA>>=
m<-9; n<-500
pai<-0.7; csi<-0.2
pr<-probcub00(m, pai, csi)
set.seed(123)
ordinal<-simcub(n, m, pai, csi)
cub<-GEM(Formula(ordinal~0|0|0),family="cub", maxiter = 50, toler = 1e-4)
pr_est<-fitted(cub)[,1]
plot(1:m, pr, type = "h", xlab = "Ordinal categories", 
     ylab = "Probability", lwd = 3, ylim = c(0, 0.3))
vett<-1:m + 0.2
lines(vett, pr_est, type = "h", col = "blue", lwd = 3, lty = 2)
legend(1, 0.3, legend = c("Theoretical", "Fitted"), col = c("black", "blue"),
       lty = c(1, 2), lwd = 3, text.col = c("black", "blue"), bty = "n")
@ 

\begin{figure}[h!]
\centering
<<eval=TRUE,echo=FALSE,fig.height=3.8,fig.width=3.8,comment=NA>>=
m<-9; n<-500
pai<-0.7; csi<-0.2
pr<-probcub00(m, pai, csi)
ordinal<-simcub(n, m, pai, csi)
cub<-GEM(Formula(ordinal~0|0|0),family="cub", maxiter = 50, toler = 1e-4)
est<-coef(cub)
est
pr_est<-fitted(cub)[,1]
plot(1:m, pr, type = "h", xlab = "Ordinal categories", 
     ylab = "Probability", lwd = 3, ylim = c(0, 0.3))
vett<-1:m + 0.2
lines(vett, pr_est, type = "h", col = "blue", lwd = 3, lty = 2)
legend(1, 0.3, legend = c("Theoretical", "Fitted"), col = c("black", "blue"),
       lty = c(1, 2), lwd = 3, text.col = c("black", "blue"), bty = "n")
@
%\includegraphics[width=7cm, height=7.5cm]{simcub.eps}
\caption{Fitted and theoretical \cub\, distributions on simulated data.}\label{fig:simcub}
\end{figure}



In order to simulate observations from a model with covariates, a two-step approach has to be implemented: one should first obtain the parameters $(\pi_i, \xi_i, \ldots)$ corresponding to the chosen respondents' profiles
and then generate the sample data by using corresponding simulation routines with the given parameters. We outline how to implement such a procedure for a \cush model with a dichotomous covariate for the \textit{shelter effect}. Figure \ref{fig:cushsimul} displayes the two \cush distributions conditional to the value of the covariate.
<<eval=FALSE,echo=TRUE,comment=NA>>=
   omega0<- -1.5
   omega1<- -2
   delta0<-as.numeric(logis(0, c(omega0, omega1)))  ## 0.1824255
   delta1<-as.numeric(logis(1, c(omega0, omega1)))  ## 0.0293122
   m<-9
   n0<-700
   n1<-1300
   set.seed(1234)
   ord0<-simcush(n0, m, delta0, shelter = s)
   ord1<-simcush(n1, m, delta1, shelter = s)
   ordinal<-c(ord0, ord1)
   X<-c(rep(0, n0), rep(1, n1))
   cushcov<-GEM(Formula(ordinal~X),family="cush",shelter = m)
   coef(cushcov)
   makeplot(cushcov)
@

\begin{figure}[h!]
<<eval=TRUE,echo=FALSE,fig.height=4,fig.width=4,comment=NA>>=
omega0<- -1.5
omega1<- -2
delta0<-as.numeric(logis(0, c(omega0, omega1)))  ## 0.1824255
delta1<-as.numeric(logis(1, c(omega0, omega1)))  ## 0.0293122
m<-9
n0<-700
n1<-1300
s<-m          # shelter category
ord0<-simcush(n0, m, delta0, shelter = s)
ord1<-simcush(n1, m, delta1, shelter = s)
ordinal<-c(ord0, ord1)
X<-c(rep(0, n0), rep(1, n1))
cushcov<-GEM(Formula(ordinal~X), family="cush", shelter = s)
coef(cushcov)
makeplot(cushcov)
@
%\includegraphics[width=7cm,height=7.5cm]{cush_simul.eps}
\caption{\cush distributions with a dichotomous covariate.}\label{fig:cushsimul}
\end{figure}


\subsection{Extra features of CUB package}\label{sec:extra}
The R package \pkg{CUB} provides a flexible framework for the analysis of ordinal data that covers some of the most common distributions related to \gem models. For a given discrete probability distribution, some additional features concern, for instance, the functions \code{gini()} and \code{laakso()}, for the normalized \cite{Gini1912} and \cite{Laakso89} heterogeneity indexes, respectively, and the function \code{deltaprob(prob)} for the Mean Difference index (according to the \cite{DeFinetti1930} formulation): Figure \ref{fig:ginilaakso} shows an example of usage.
<<eval=FALSE,echo=TRUE,comment=NA>>=
m<-7
pai<-0.4
csi<-0.2
prob<-probcub00(m, pai, csi)
Giniindex<-round(gini(prob), digits = 3)
Laaksoindex<-round(laakso(prob), digits = 3)
Delta<-round(deltaprob(prob), digits = 3)
plot(1:m, prob, type = "n", ylab = "", xlab = "", ylim = c(0, 0.4), las = 1)
lines(1:m, prob, type = "h", lwd = 3)
legend("topleft", xjust = 1, legend = c(paste("Gini       =", Giniindex),
                  paste("Laakso  =", Laaksoindex),
                  paste("Delta     =", Delta)), cex = 0.8)
@
\begin{figure}[h!]
\centering
<<eval=TRUE,echo=FALSE,comment=NA,fig.height=4,fig.width=4>>=
m<-7
pai<-0.4
csi<-0.2
prob<-probcub00(m, pai, csi)
Giniindex<-round(gini(prob), digits = 3)
Laaksoindex<-round(laakso(prob), digits = 3)
Delta<-round(deltaprob(prob), digits = 3)
plot(1:m, prob, type = "h", ylab = "", xlab = "", ylim = c(0, 0.4), las = 1)
legend("topleft", xjust = 1, legend = c(paste("Gini       =", Giniindex),
                  paste("Laakso  =", Laaksoindex),
                  paste("Delta     =", Delta)), cex = 0.8)
@
\caption{Heterogeneity and mutual variability indexes.}\label{fig:ginilaakso}
\end{figure}


Another example concerns the simulation of the critical values for the normalized dissimilarity index between observed relative frequencies $(f_1,\dots,f_m)$ and estimated (theoretical) probabilities $(p_1(\hat{\bm \theta}),\dots,p_m(\hat{\bm \theta}))$:
$$ Diss = \dfrac{1}{2}\sum_{r=1}^m |f_r - p_r(\hat{\theta})|,$$
which is computed by the routine \code{dissim()}. This index evaluates the distance of the estimated model to a perfect fit. Due to its importance in model performance, this dissimilarity measure is computed when fitting \cub\, models and extensions within \pkg{CUB} package and displayed also in the plot title when it is returned. For this index we show how to run a simulation experiment for detecting the critical value for an assigned \cub\, model. The code concerns the distribution of a \cub\, model with $\pi=0.3$ and $\xi=0.8$. Figure \ref{fig:dissim} displays the results after \code{nsimul}$=10000$ simulations of samples of sizes $n=300$ over $m=7$ categories.

<<eval=FALSE,echo=TRUE,comment=NA>>=
  pai<-0.3; csi<-0.8
  nsimul<-10000
  n<-300; m<-7
  vectdiss<-rep(NA, nsimul)
  for (j in 1:nsimul){
    ordinal<-simcub(n, m, pai, csi)
    mod<-GEM(Formula(ordinal~0|0|0),family="cub")
    theor<-fitted(mod)[,1]
    freq<-tabulate(ordinal, nbins = m)/n
    vectdiss[j]<-dissim(freq, theor)
  }
  
  sortvect<-sort(vectdiss)
  alpha<-0.05
  signif<-sortvect[(1-alpha)*nsimul] # empirical percentile 0.05
  cr<-vectdiss[vectdiss>signif]       # critical region
  
  effe<-density(vectdiss)
  band<-effe$bw     # band width of the kernel plot
  f<-function(x){   # compute kernel density (the default is Gaussian kernel)
    (1/(band*length(vectdiss)))*sum(dnorm((x-vectdiss)/band))
  }
  sortcr<-sort(cr)
  sup<-numeric(length(cr))
  for (j in 1:length(cr)){
    sup[j]=f(sortcr[j])    # compute kernel density values for critical values
  }
  title <- paste("Normalized Dissimilarity distribution, Critical level(0.05) = ",
                round(signif, 3))
  plot(density(vectdiss), main = title, cex.main = 0.7, lwd = 3, xlab = "", 
       cex.main = 0.7, las = 1)
  polygon(c(signif, sortcr, sortcr[length(sortcr)], signif), c(0, sup, 0, 0),
          col = "gray")
  abline(h=0)
@
\begin{figure}[h!]
<<eval=TRUE,echo=FALSE,fig.width=5,fig.height=5,comment=NA>>=
  pai<-0.3; csi<-0.8
  nsimul<-10000
  n<-300; m<-7
  vectdiss<-rep(NA, nsimul)
  for (j in 1:nsimul){
    ordinal<-simcub(n, m, pai, csi)
    mod<-GEM(Formula(ordinal~0|0|0),family="cub")
    paiest<-coef(mod)[1]
    csiest<-coef(mod)[2]
    theor<-fitted(mod)[,1]
    freq<-tabulate(ordinal, nbins = m)/n
    vectdiss[j]<-dissim(freq, theor)
  }
  
  sortvect<-sort(vectdiss)
  alpha<-0.05
  signif<-sortvect[(1-alpha)*nsimul] # empirical percentile 0.05
  cr=vectdiss[vectdiss>signif]       # critical region
  
  effe<-density(vectdiss)
  band<-effe$bw     # band width of the kernel plot
  f<-function(x){   # compute kernel density (the default is Gaussian kernel)
    (1/(band*length(vectdiss)))*sum(dnorm((x-vectdiss)/band))
  }
  sortcr<-sort(cr)
  sup<-numeric(length(cr))
  for (j in 1:length(cr)){
    sup[j]=f(sortcr[j])    # compute kernel density values for critical values
  }
 title = paste("Normalized Dissimilarity distribution \n Critical level(0.05) = ", round(signif, 3))
  plot(density(vectdiss), main = title, cex.main = 0.7, lwd = 3, xlab = "", 
       cex.main = 0.7, las = 1)
  polygon(c(signif, sortcr, sortcr[length(sortcr)], signif), c(0, sup, 0, 0),
          col = "gray")
  abline(h=0)
@  
  \caption{Distribution of the (normalized) dissimilarity index for simulated \cub\, model with $\pi=0.3$ and $\xi=0.8$.}\label{fig:dissim}
  \end{figure}
  
  
We conclude the presentation of package \pkg{CUB} by underlying the effective graphical interpretation of parameters allowed by \cub\, models. When covariates are included to explain feeling and uncertainty, a \textit{Scatter of Parameter
Estimates} (SPE, for short) \citep{StatAppli} reveals to be a convincing plotting device to identify different response patterns across subsets of respondents. A SPE simply performs the multiple representation of a \cub\, model as a point in the parameter space for each parameter vector $(\pi_i,\xi_i)$ associated with each respondent. We show an example based on the \code{univer} dataset, by showing the \cub\, model for the \code{global} satisfaction conditioning both feeling and uncertainty on the evaluation expressed for \code{officeho}, and further specifying both components for varying age. 

<<eval=FALSE,echo=TRUE,comment=NA>>=
data(univer)
ordinal<-univer$global
lage<-log(univer$age)-mean(log(univer$age)) #Deviation from mean of logged Age
Y<-W<-cbind(univer$officeho,lage)
cub_pai_csi<-GEM(Formula(global~Y|W|0),family="cub",data=univer)
bet<-coef(cub_pai_csi)[1:3]
gama<-coef(cub_pai_csi)[4:6]
paivett<-logis(Y,bet)
csivett<-logis(W,gama)
n<-length(ordinal)
main<- "Scatter plot of estimated parameters"
vettcol<-symb<-rep(NA,n)
vettcol[univer$officeho<=3]<-"red"
vettcol[univer$officeho==4]<-"black"
vettcol[univer$officeho>=5]<-"blue"
  
symb[univer$officeho<=3]<-0
symb[univer$officeho==4]<-1
symb[univer$officeho>=5]<-2
  
plot(1-paivett,1-csivett,xlim=c(0,1),ylim=c(0,1),
     cex=0.8,pch=symb,col=vettcol,
     xlab=expression(1-pi),ylab=expression(1-xi),
     main=main,cex.main=1,font.main=4)
legend("topright",legend=c("Unsatisfied","Indifferent","Satisfied"),
       cex=0.7,text.col=c("red","black","blue"),pch=c(0,1,2),
       col=c("red","black","blue"))
@  

\begin{figure}[h!]
<<eval=TRUE,echo=FALSE,fig.width=4.5,fig.height=4.5,comment=NA>>=
data(univer)
ordinal<-univer$global
lage<-log(univer$age)-mean(log(univer$age)) #Deviation from mean of logged Age
Y<-W<-cbind(univer$officeho,lage)
cub_pai_csi<-GEM(Formula(global~Y|W|0),family="cub",data=univer)
bet<-coef(cub_pai_csi)[1:3]
gama<-coef(cub_pai_csi)[4:6]
paivett<-logis(Y,bet)
csivett<-logis(W,gama)
n<-length(ordinal)
caption<- "Scatter plot of estimated parameters"
vettcol<-symb<-rep(NA,n)
vettcol[univer$officeho<=3]<-"red"
vettcol[univer$officeho==4]<-"black"
vettcol[univer$officeho>=5]<-"blue"
  
symb[univer$officeho<=3]<-0
symb[univer$officeho==4]<-1
symb[univer$officeho>=5]<-2
  
plot(1-paivett,1-csivett,xlim=c(0,1),ylim=c(0,1),
     cex=0.8,pch=symb,col=vettcol,
     xlab=expression(1-pi),ylab=expression(1-xi),
     main=caption,cex.main=1,font.main=4)
legend("topright",legend=c("Unsatisfied","Indifferent","Satisfied"),
       cex=0.7,text.col=c("red","black","blue"),pch=c(0,1,2),
       col=c("red","black","blue"))

@  
\end{figure}


  
\section{Conclusions}
Package \pkg{CUB} is under active development. Future plans include extra functions to fit \cube\, models with further covariate specification, as well as higher flexibility for \gecub models to possibly include covariates only for \emph{shelter effect} or any pair of components. Implementation of Hierarchical \cub\, models and other extensions, including an adjusted version of \cub models to account for response styles and a multivariate model for repeated measurements are under scrutiny. 

Package \pkg{CUB} has been implemented under R Version 3.2.5. 
% All \proglang{R} code used in this paper is also available in a source code file XXX.R together with the paper.
%\vskip{5cm}
%\section*{Acknowledgements}
%This work has been supported by SHAPE project within the frame of Programme STAR (CUP E68C13000020003) at University of Naples Federico II, financially supported by UniNA and Compagnia di San Paolo.

\nocite*{}
\bibliographystyle{plain}

%\bibliography{bibliojss}
\begin{thebibliography}{99}

\bibitem[Agresti(2010)]{Agr10}
Agresti A. (2010). \emph{Analysis of Ordinal Categorical Data}, $2^{nd}$ edition. J.Wiley \& Sons, Hoboken.

\bibitem[Akaike(1974)]{Akaike1974}
Akaike H. (1974). A New Look at the Statistical Model Identification. \emph{IEEE Transactions on
Automatic Control}, \textbf{19}(6), 716--723.

\bibitem[Andreis and Ferrari(2013)]{AndreisFerrari2013}
Andreis F., Ferrari P.A. (2013). On a copula model with \cub margins. \emph{Quaderni di Statistica}, \textbf{15}(1), 33--51.

\bibitem[Balirano and Corduas(2008)]{Balirano2008}
Balirano G., Corduas M. (2008). Detecting Semiotically Expressed Humor in Diasporic TV Productions. \emph{Humor}, \textbf{21}(3), 227--251.

\bibitem[Bodzogan(1990)]{Bodzogan1990}
Bodzogan H. (1990). On the information-based measure of covariance complexity and its
application to the evaluation of multivariate linear models. \emph{Communications in Statistics. Theory and Methods}, \textbf{19}(1), 221--278.

\bibitem[Capecchi \emph{et al.}(2015)]{CapEndrizzi}
Capecchi S., Endrizzi I., Gasperi F. and Piccolo D. (2015). A multi-product approach for detecting subjects' and objects' covariates in consumer preferences. \emph{British Food Journal}, \textbf{118}(3), 515--526.

\bibitem[Capecchi and Iannario(2016)]{CapecchiIannario2016}
Capecchi S., Iannario M. (2016). Gini heterogeneity index for detecting uncertainty in ordinal data surveys. \emph{Metron}, \textbf{74}(2), 223--232.

\bibitem[Capecchi and Piccolo(2014)]{CapecchiPiccolo2014}
Capecchi S., Piccolo D. (2014). Modelling the latent components of personal happiness, in: Perna and Sibillo (eds.): \emph{Mathematical and Statistical Methods for Actuarial  Sciences and Finance}, Springer Verlag, Berlin, pp.49--52.

\bibitem[Capecchi and Piccolo(2016)]{CapecchiPiccolo2016}
Capecchi S., Piccolo D. (2016). Dealing with heterogeneity in ordinal responses. \emph{Quality \& Quantity}, \textbf{51}(5), 2375--2393.

\bibitem[Capecchi \textit{et al.}(2017)]{CapecchiPiccoloSimone2016}
Capecchi S., Piccolo D. and Simone R. (2017), An inflated model to account for large heterogeneity in ordinal data. In: \emph{Data Science, Innovative Developments in Data Analysis and Clustering}, IFCS Proceedings, 205--217.

\bibitem[Christensen(2015)]{Christensen2015}
Christensen R.H.B. (2015). Package `ordinal'. Regression Models for Ordinal Data. \url{http://CRAN.R-project.org/package=ordinal}.

\bibitem[Colombi and Giordano(2016)]{ColombiGiordano2015}
Colombi R., Giordano S. (2016). A class of mixture models for multidimensional
ordinal data, \emph{Statistical Modelling: an International Journal}, \textbf{16}(4), 322--340.

\bibitem[Corduas(2015)]{Corduas2015}
Corduas M. (2015). Analyzing bivariate ordinal data with \cub\, margins. \emph{Statistical Modelling: an International Journal}, \textbf{15}(5), 411--432.

\bibitem[Corduas \textit{et al.}(2009)]{Corduas2009}
Corduas M., Iannario M. and Piccolo D. (2009). A class of statistical models for evaluating services and performances, in: M. Bini \emph{et al.} (eds.): \emph{Statistical methods for the evaluation of educational services and quality of products}, Contribution to Statistics (pp.99--117). Berlin Heidelberg: Physica-Verlag, Springer.

\bibitem[D'Elia(2003)]{Delia2003}
D'Elia A. (2003). Modelling ranks using the Inverse Hypergeometric distribution. \emph{Statistical Modelling: an International Journal}, \textbf{3}(1), 65--78.

\bibitem[D'Elia(2008)]{Delia2008}
D'Elia A. (2008). A statistical modelling approach for the analysis of TMD chronic pain data. \emph{Statistical Methods in Medical Research}, \textbf{17}(4), 389--403.

\bibitem[D'Elia and Piccolo(2005)]{DelPicc2005}
D'Elia A., Piccolo D. (2005). A mixture model for preference data analysis. \emph{Computational Statistics \& Data Analysis}, \textbf{49}(3), 917--934.

\bibitem[de Finetti and Paciello (1930)]{DeFinetti1930}
de Finetti B., Paciello U. (1930), Calcolo della differenza media, 
\emph{Metron}, \textbf{8}, 89--94.

\bibitem[Gambacorta and Iannario (2013)]{Gamba13}
Gambacorta R., Iannario M. (2013). Measuring job satisfaction with CUB models. \emph{Labour},\textbf{ 27}(2), 198--224.

\bibitem[Gambacorta \textit{et al.}(2014)]{Gamba14}
Gambacorta R., Iannario M., Vaillant R. (2014). Design-based inference in a mixture model for ordinal variables for a two stage stratified design. \emph{Australian and New Zealand Journal of Statistics}, \textbf{56}(2), 125--143.

\bibitem[Gini(1912)]{Gini1912}
Gini, C. (1912). \emph{Variabilit\`{a} e mutabilit\`{a}}. Studi economico-giuridici, Facolt\`{a} di Giurisprudenza, Universit\`{a} di Cagliari, A, III, parte II.

\bibitem[Gottard \textit{et al.}(2016)]{Gott2016}
Gottard A., Iannario M., Piccolo D. (2016). Varying uncertainty in \cub\, models. \emph{Advances in Data Analysis and Classification}, \textbf{10}(2), 225--244.

\bibitem[Greene and Hensher(2010)]{GreeneHensher2010}
Greene W.H.,  Hensher D.A. (2010). \emph{Modeling Ordered Choices: A Primer}. Cambridge University Press.

\bibitem[Grilli \textit{et al.}(2014)]{Grilli14}
Grilli L., Iannario M., Piccolo D., Rampichini C. (2014). Latent Class \cub\, Models. \emph{Advances in Data Analysis and Classification}, \textbf{8}(1), 105--119.

\bibitem[Harrell(2009)]{Harrell09}
Harrell F.E. Jr (2009). rms: Regression Modeling Strategies. R package version 2.1-0. \url{http://CRAN.R-project.org/package=rms}.

\bibitem[Iannario(2008)]{Ian08}
Iannario M. (2008). Selecting feeling covariates in rating surveys. \emph{Rivista di Statistica Applicata}, \textbf{20}, 103--116.


\bibitem[Iannario(2010)]{Ian10}
Iannario M. (2010). On the identifiability of a mixture model for ordinal data. \emph{Metron}, \textbf{LXVIII}(1), 87--94.

\bibitem[Iannario(2012a)]{Iannario2012a}
Iannario M. (2012a). Modelling \emph{shelter} choices in a class of mixture models for ordinal responses. \emph{Statistical Methods and Applications}, \textbf{21}(1), 1--22.

\bibitem[Iannario(2012b)]{Ian12b}
Iannario M. (2012b). \cube\, models for interpreting ordered categorical data with overdispersion. \emph{Quaderni di Statistica}, \textbf{14}(1), 137--140.

\bibitem[Iannario(2012c)]{Ian12c}
Iannario M. (2012c). Preliminary estimators for a mixture model of ordinal data. \emph{Advances in Data Analysis and Classification}, \textbf{6}(3), 163--184.

\bibitem[Iannario(2012d)]{Ian12d}
Iannario M. (2012d). Hierarchical \cub\, models for ordinal variables. \emph{Communications in Statistics. Theory and Methods}, \textbf{41}(16-17), 3110--3125.

\bibitem[Iannario(2014)]{Iannario2014}
Iannario M. (2014). Modelling Uncertainty and Overdispersion in Ordinal Data.
\emph{Communications in Statistics. Theory and Methods}, \textbf{43}(4), 771--786.

\bibitem[Iannario and Piccolo(2014)]{StatAppli}
Iannario M., Piccolo D. (2014), Inference for \cub\, models: a program in R, \emph{Statistica \& Applicazioni}, \textbf{XII}, 177--204.

\bibitem[Iannario(2015)]{Iannario2015}
Iannario M. (2015). Detecting latent components in ordinal data with overdispersion by means of a mixture distribution. \emph{Quality \& Quantity}, \textbf{49}(3), 977--987.

\bibitem[Iannario(2016)]{Iannario2016}
Iannario M. (2016). Testing the overdispersion parameter in \cube\, models. \emph{Communications in Statistics: Simulation and Computation}, \textbf{45}(5), 1621--1635.

\bibitem[Iannario and Piccolo(2010a)]{Ianpicc10a}
Iannario M., Piccolo D. (2010a). A New Statistical Model for the Analysis of Customer Satisfaction. \emph{Quality Technology \& Quantitative Management}, \textbf{7}(2), 149--168.

\bibitem[Iannario and Piccolo(2010b)]{Ianpicc10b}
Iannario M., Piccolo D. (2010b). Statistical modelling of subjective survival probabilities. \emph{GENUS}, \textbf{LXVI}(2), 17--42.

\bibitem[Iannario and Piccolo(2016a)]{IannarioPiccolo2016a}
Iannario M., Piccolo D. (2016a). A comprehensive framework for regression models of ordinal Data. \emph{Metron}, \textbf{74}(2), 233--252.

\bibitem[Iannario and Piccolo(2016b)]{IannarioPiccolo2016b}
Iannario M., Piccolo D. (2016b). A generalized framework for modelling ordinal data. \emph{Statistical Methods and Applications}, \textbf{25}, 163--189.
% \bibitem[Iannario and Piccolo(2016b)]{IannarioPiccolo2015c}
% Iannario M., Piccolo D. (2016b). A comparative analysis of alternative frameworks for modelling ordinal data. \emph{Working paper}.

\bibitem[Iannario \textit{et al.}(2012)]{Iannarioetal12}
Iannario M., Manisera M., Piccolo D., Zuccolotto P. (2012). Sensory analysis in the food industry as a tool for marketing decisions. \emph{Advances in Data Analysis and Classification}, \textbf{6}(4),  303--321.

\bibitem[Iannario \textit{et al.}(2016a)]{Iannarioetal16}
Iannario M., Manisera M., Zuccolotto P. (2016). Treatment of ``don't know'' responses in the consumers' perceptions about sustainability in the agri-food sector. \emph{Quality and Quantity}, DOI:10.1007/s11135-016-0438-7.

\bibitem[Iannario  \textit{et al.}(2016b)]{Iannarioetal16a}
Iannario M., Monti A.C., Piccolo D. (2016). Robustness issues for \cub\, models. \emph{TEST}, \textbf{25}(4), 731--750.

\bibitem[Iannario  \textit{et al.}(2017)]{Iannarioetal16b}
Iannario M., Monti A.C., Piccolo D., Ronchetti E. (2017). Robust inference for ordinal response models. \emph{Electronic Journal of Statistics}, \textbf{11}(2),  3407--3445.

% \bibitem[Iannario \textit{et al.}(2016c)]{IannarioPiccoloSimone}
% Iannario M., Piccolo D., Simone R. (2016c). CUB: A Class of Mixture Models for Ordinal Data. R package version 0.1. \url{http://CRAN.R-project.org/package=CUB}.

\bibitem[Laakso and Taagepera(1989)]{Laakso89}
Laakso, M. and Taagepera, R. (1989). Effective number of parties: a measure with application to West Europe, \emph{Comparative Political Studies}, \textbf{12}, 3--27.

\bibitem[Manisera and Zuccolotto(2014a)]{ManiZucc2014a}
Manisera M., Zuccolotto P. (2014a). Modeling ``Don't know'' responses in rating scales. \emph{Pattern Recognition Letters}, \textbf{45}, 226--234.

\bibitem[Manisera and Zuccolotto(2014b)]{ManiZucc2014b}
Manisera M., Zuccolotto P. (2014b). Modeling rating data with Nonlinear \cub\, models. \emph{Computational Statistics and Data Analysis}, \textbf{78}, 100--118.

\bibitem[Martin \textit{et al.}(2009)]{Martin2009}
Martin A. D., Quinn K. M., and Park J. H. (2009). MCMCpack: Markov Chain Monte Carlo in R. \emph{Journal of Statistical Software}, \textbf{42}(9), 1--21.

\bibitem[McCullagh(1980)]{McCullagh1980}
McCullagh P. (1980). Regression models for ordinal data. \emph{Journal of the Royal Statistical Society, Series B}, \textbf{42}(2), 109--142.

\bibitem[McLachlan and Krishnan(1997)]{Mac1997}
McLachlan G.J.,  Krishnan T. (1997). \emph{The EM Algorithm and Extensions}. John Wiley \& Sons, New York.

\bibitem[Piccolo(2003)]{Piccolo2003}
Piccolo D. (2003). On the moments of a mixture of uniform and shifted binomial random variables. \emph{Quaderni di Statistica}, \textbf{5}(1), 85--104.

\bibitem[Piccolo(2006)]{Piccolo2006}
Piccolo D. (2006). Observed information matrix for MUB models. \emph{Quaderni di Statistica}, \textbf{8}(1), 33--78.

\bibitem[Piccolo(2015)]{Piccolo2015}
Piccolo D. (2015). Inferential issues for \cube\, models with covariates. \emph{Communications in Statistics. Theory and Methods}, \textbf{44}(23), 771--786.

\bibitem[Piccolo and D'Elia(2008)]{Picd08}
Piccolo D., D'Elia A. (2008). A new approach for modelling consumers' preferences. \emph{Food Quality and  Preference}, \textbf{19}(3), 247--259.

\bibitem[Ripley and Venables(2016)]{Ripley2016}
Ripley B.D., Venables, W.N. (2016). Package `nnet'. \url{http://CRAN.R-project.org/package=NNET}.

\bibitem[Schwarz(1978)]{Schwarz1978}
Schwarz G. (1978). Estimating the Dimension of a Model. \emph{The Annals of Statistics}, \textbf{6}(2), 461--464.

\bibitem[Tutz(2012)]{Tutz2012}
Tutz G. (2012). \emph{Regression for Categorical Data}. Cambridge University Press, Cambridge.

\bibitem[Tutz \textit{et al.}(2016)]{Tutz2016}
Tutz G., Schneider M., Iannario M. and Piccolo D. (2016). Mixture models for ordinal responses to account for uncertainty of choice. \emph{Advances in Data Analysis and Classification}, \textbf{11}(2), 281--305.

\bibitem[Venables and Ripley(2002)]{Ripley2002}
Venables W.N., Ripley B.D. (2002). \emph{Modern Applied Statistics with S}. Fourth edition. Springer.

\bibitem[Yee(2010)]{Yee2010}
Yee T.W. (2010). The VGAM Package for Categorical Data Analysis. \emph{Journal of Statistical Software}, \textbf{32}(10), 1--34.

\bibitem[Zeileis and Croissant(2010)]{Formula}
Zeileis A., Croissant Y. (2010). Extended Model Formulas in R: Multiple Parts and Multiple Responses. \emph{Journal of Statistical Software}, \textbf{34}(1), 1--13.


\end{thebibliography}



\end{document}