## ----setup, include = FALSE---------------------------------------------------
library(survPen)
library(splines) # for ns

knitr::opts_chunk$set(
  collapse = TRUE,
  comment = "#>",
  fig.width = 8, 
  fig.height = 4.5
)

## ----echo=TRUE, results='asis'------------------------------------------------
data(datCancer)
knitr::kable(head(datCancer,10))

## ----fig.show='hold'----------------------------------------------------------
f.cst <- ~1
mod.cst <- survPen(f.cst,data=datCancer,t1=fu,event=dead)

## ----fig.show='hold'----------------------------------------------------------
f.pwcst <- ~pwcst(breaks=seq(0,5,by=0.5))
mod.pwcst <- survPen(f.pwcst,data=datCancer,t1=fu,event=dead)

## ----fig.show='hold'----------------------------------------------------------
f.lin <- ~fu
mod.lin <- survPen(f.lin,data=datCancer,t1=fu,event=dead)

## ----fig.show='hold'----------------------------------------------------------
library(splines)

f.rcs <- ~ns(fu,knots=c(0.25, 0.5, 1, 2, 4),Boundary.knots=c(0,5))

mod.rcs <- survPen(f.rcs,data=datCancer,t1=fu,event=dead)

## ----fig.show='hold'----------------------------------------------------------
f.pen <- ~ smf(fu,knots=c(0,0.25, 0.5, 1, 2, 4,5)) # careful here: the boundary knots are included

mod.pen <- survPen(f.pen,data=datCancer,t1=fu,event=dead)

## ----fig.show='hold'----------------------------------------------------------
mod.unpen <- survPen(f.pen,data=datCancer,t1=fu,event=dead,lambda=0)

## ----fig.show='hold'----------------------------------------------------------
new.time <- seq(0,5,length=100)
pred.cst <- predict(mod.cst,data.frame(fu=new.time))
pred.pwcst <- predict(mod.pwcst,data.frame(fu=new.time))
pred.lin <- predict(mod.lin,data.frame(fu=new.time))
pred.rcs <- predict(mod.rcs,data.frame(fu=new.time))
pred.pen <- predict(mod.pen,data.frame(fu=new.time))


lwd1 <- 2

par(mfrow=c(1,1))
plot(new.time,pred.cst$haz,type="l",ylim=c(0,0.2),main="hazard vs time",
xlab="time since diagnosis (years)",ylab="hazard",col="black",lwd=lwd1)
segments(x0=new.time[1:99],x1=new.time[2:100],y0=pred.pwcst$haz[1:99],col="blue3",lwd=lwd1)
lines(new.time,pred.lin$haz,col="green3",lwd=lwd1)
lines(new.time,pred.rcs$haz,col="orange",lwd=lwd1)
lines(new.time,pred.pen$haz,col="red",lwd=lwd1)

legend("topright",
legend=c("constant","piecewise constant","log-linear","cubic spline","penalized cubic spline"),
col=c("black","blue3","green3","orange","red"),
lty=rep(1,5),lwd=rep(lwd1,5))


## ----fig.show='hold'----------------------------------------------------------

par(mfrow=c(1,2))
plot(new.time,pred.pen$haz,type="l",ylim=c(0,0.2),main="Hazard from mod.pen with CIs",
xlab="time since diagnosis (years)",ylab="hazard",col="red",lwd=lwd1)
lines(new.time,pred.pen$haz.inf,lty=2)
lines(new.time,pred.pen$haz.sup,lty=2)

plot(new.time,pred.pen$surv,type="l",ylim=c(0,1),main="Survival from mod.pen with CIs",
xlab="time since diagnosis (years)",ylab="survival",col="red",lwd=lwd1)
lines(new.time,pred.pen$surv.inf,lty=2)
lines(new.time,pred.pen$surv.sup,lty=2)

## ----fig.show='hold'----------------------------------------------------------
f.pen.age <- ~tensor(fu,age,df=c(5,5)) # see below for explanations about tensor models
mod.pen.age <- survPen(f.pen.age,data=datCancer,t1=fu,event=dead)
pred.pen.HR <- predict(mod.pen.age,data.frame(fu=new.time,age=70),newdata.ref=data.frame(fu=new.time,age=30),type="HR")

par(mfrow=c(1,2))
plot(new.time,pred.pen.HR$HR,type="l",ylim=c(0,15),main="Hazard ratio (age 70 vs 30)",
xlab="time since diagnosis (years)",ylab="hazard ratio",col="red",lwd=lwd1)
lines(new.time,pred.pen.HR$HR.inf,lty=2)
lines(new.time,pred.pen.HR$HR.sup,lty=2)

plot(new.time,pred.pen.HR$diff.surv,type="l",
ylim=range(c(pred.pen.HR$diff.surv.inf,pred.pen.HR$diff.surv.sup)),
main="Survival difference (age 70 vs 30)",xlab="time since diagnosis (years)",
ylab="survival difference",col="red",lwd=lwd1)
lines(new.time,pred.pen.HR$diff.surv.inf,lty=2)
lines(new.time,pred.pen.HR$diff.surv.sup,lty=2)


## ----fig.show='hold'----------------------------------------------------------
# you can also calculate the hazard yourself with the lpmatrix option.
# For example, compare the following predictions:
haz.pen <- pred.pen$haz

X.pen <- predict(mod.pen,data.frame(fu=new.time),type="lpmatrix")
haz.pen.lpmatrix <- as.numeric(exp(X.pen%*%mod.pen$coefficients))

summary(haz.pen.lpmatrix - haz.pen)

## ----fig.show='hold'----------------------------------------------------------
# standard errors from the Bayesian covariance matrix Vp
std <- sqrt(rowSums((X.pen%*%mod.pen$Vp)*X.pen))

qt.norm <- stats::qnorm(1-(1-0.95)/2)
haz.inf <- as.vector(exp(X.pen%*%mod.pen$coefficients-qt.norm*std))
haz.sup <- as.vector(exp(X.pen%*%mod.pen$coefficients+qt.norm*std))

# checking that they are similar to the ones given by the predict function
summary(haz.inf - pred.pen$haz.inf)
summary(haz.sup - pred.pen$haz.sup)


## ----echo=TRUE----------------------------------------------------------------
summary(mod.pen)

## ----fig.show='hold'----------------------------------------------------------
mod.pen$ll.unpen
mod.pen$ll.pen
mod.pen$p
sum(mod.pen$edf)
mod.pen$LAML
mod.pen$lambda
summary(mod.pen)$edf.per.smooth

## ----fig.show='hold'----------------------------------------------------------
mod.pen$aic

## ----fig.show='hold'----------------------------------------------------------
mod.pen$edf

## ----fig.show='hold'----------------------------------------------------------
mod.pen$aic2

## ----fig.show='hold'----------------------------------------------------------
mod.pen$edf2

## ----fig.show='hold'----------------------------------------------------------
f1 <- ~smf(fu)

mod.LCV <- survPen(f1,data=datCancer,t1=fu,event=dead,expected=NULL,method="LCV")
mod.LCV$lambda

mod.LAML <- survPen(f1,data=datCancer,t1=fu,event=dead,expected=NULL,method="LAML")
mod.LAML$lambda

new.time <- seq(0,5,length=100)
pred.LCV <- predict(mod.LCV,data.frame(fu=new.time))
pred.LAML <- predict(mod.LAML,data.frame(fu=new.time))


par(mfrow=c(1,1))
plot(new.time,pred.LCV$haz,type="l",ylim=c(0,0.2),main="LCV vs LAML",
xlab="time since diagnosis (years)",ylab="hazard",col="black",lwd=lwd1)
lines(new.time,pred.LAML$haz,col="red",lwd=lwd1,lty=2)
legend("topright",legend=c("LCV","LAML"),col=c("black","red"),lty=c(1,2),lwd=rep(lwd1,2))


## ----fig.show='hold'----------------------------------------------------------
rho.vec <- seq(-1,15,length=50)
LCV <- rep(0,50)
LAML <- rep(0,50)

for (i in 1:50){
	mod <- survPen(f1,data=datCancer,t1=fu,event=dead,lambda=exp(rho.vec[i]))
	LCV[i] <- mod$LCV
	LAML[i] <- mod$LAML
}	
	
	
par(mfrow=c(1,2),mar=c(3,3,1.5,0.5),mgp=c(1.5,0.5,0))
plot(rho.vec,LCV,type="l",main="LCV vs log(lambda)",ylab="LCV",xlab="log(lambda)",lwd=lwd1)	
	
plot(rho.vec,LAML,type="l",main="LAML vs log(lambda)",ylab="-LAML",xlab="log(lambda)",lwd=lwd1)	

## ----fig.show='hold'----------------------------------------------------------
f1 <- ~smf(fu,df=5)

## ----fig.show='hold'----------------------------------------------------------
df1 <- 5
quantile(unique(datCancer$fu),seq(0,1,length=df1)) 


## ----fig.show='hold'----------------------------------------------------------
mod1 <- survPen(f1,data=datCancer,t1=fu,event=dead)

mod1$list.smf

## ----fig.show='hold'----------------------------------------------------------
# f1 <- ~smf(fu,knots=c(0,1,3,6,8))

## ----fig.show='hold'----------------------------------------------------------
mod.total <- survPen(f1,data=datCancer,t1=fu,event=dead,method="LAML")

mod.excess <- survPen(f1,data=datCancer,t1=fu,event=dead,expected=rate,method="LAML")

# compare the predictions of the models
new.time <- seq(0,5,length=100)
pred.total <- predict(mod.total,data.frame(fu=new.time))
pred.excess <- predict(mod.excess,data.frame(fu=new.time))

# hazard vs excess hazard
par(mfrow=c(1,2))
plot(new.time,pred.total$haz,type="l",ylim=c(0,0.2),main="hazard vs excess hazard",
xlab="time since diagnosis (years)",ylab="hazard",lwd=lwd1)
lines(new.time,pred.excess$haz,col="red",lwd=lwd1,lty=2)
legend("topright",legend=c("total","excess"),col=c("black","red"),lty=c(1,2), lwd=rep(lwd1,2))

plot(new.time,pred.total$surv,type="l",ylim=c(0,1),main="survival vs net survival",
xlab="time",ylab="survival",lwd=lwd1)
lines(new.time,pred.excess$surv,col="red",lwd=lwd1,lty=2)
legend("bottomleft",legend=c("overall survival","net survival"), col=c("black","red"), lty=c(1,2), lwd=rep(lwd1,2)) 



## ----fig.show='hold'----------------------------------------------------------
f.tensor <- ~tensor(fu,age,df=c(5,5))

f.tint <- ~tint(fu,df=5)+tint(age,df=5)+tint(fu,age,df=c(5,5))

# hazard model
mod.tensor <- survPen(f.tensor,data=datCancer,t1=fu,event=dead)
summary(mod.tensor)

mod.tint <- survPen(f.tint,data=datCancer,t1=fu,event=dead)
summary(mod.tint)

# predictions
new.age <- seq(50,90,length=50)
new.time <- seq(0,7,length=50)

Z.tensor <- outer(new.time,new.age,function(t,a) predict(mod.tensor,data.frame(fu=t,age=a))$haz)
Z.tint <- outer(new.time,new.age,function(t,a) predict(mod.tint,data.frame(fu=t,age=a))$haz)

# color settings
col.pal <- colorRampPalette(c("white", "red"))
colors <- col.pal(100)

facet <- function(z){

	facet.center <- (z[-1, -1] + z[-1, -ncol(z)] + z[-nrow(z), -1] + z[-nrow(z), -ncol(z)])/4
	cut(facet.center, 100)
	
}


theta1 = 30
zmax=1.1

# plot the hazard surfaces for both models
par(mfrow=c(1,2),mar=c(3,3,1.5,0.5),mgp=c(1.5,0.5,0))
persp(new.time,new.age,Z.tensor,col=colors[facet(Z.tensor)],main="tensor",theta=theta1,
xlab="\n time since diagnosis",ylab="\n age",zlab="\n excess hazard",
ticktype="detailed",zlim=c(0,zmax))
persp(new.time,new.age,Z.tint,col=colors[facet(Z.tint)],main="tint",theta=theta1,
xlab="\n time since diagnosis",ylab="\n age",zlab="\n excess hazard",
ticktype="detailed",zlim=c(0,zmax))


## ----fig.show='hold'----------------------------------------------------------
set.seed(18)
subdata <- datCancer[sample(1:2000,50),]

## ----fig.show='hold'----------------------------------------------------------
mod.tensor.sub <- survPen(f.tensor,data=subdata,t1=fu,event=dead)

mod.tint.sub <- survPen(f.tint,data=subdata,t1=fu,event=dead)

## ----fig.show='hold'----------------------------------------------------------
# tensor
mod.tensor.sub$lambda
summary(mod.tensor.sub)$edf.per.smooth

# tint
mod.tint.sub$lambda
summary(mod.tint.sub)$edf.per.smooth

## ----fig.show='hold'----------------------------------------------------------
new.age <- seq(quantile(subdata$age,0.10),quantile(subdata$age,0.90),length=50)
new.time <- seq(0,max(subdata$fu),length=50)

Z.tensor.sub <- outer(new.time,new.age,function(t,a) predict(mod.tensor.sub,data.frame(fu=t,age=a))$haz)
Z.tint.sub <- outer(new.time,new.age,function(t,a) predict(mod.tint.sub,data.frame(fu=t,age=a))$haz)

theta1 = 30
zmax=0.7

# plot the hazard surfaces for both models
par(mfrow=c(1,2),mar=c(3,3,1.5,0.5),mgp=c(1.5,0.5,0))
persp(new.time,new.age,Z.tensor.sub,col=colors[facet(Z.tensor.sub)],main="tensor",theta=theta1,
xlab="\n time since diagnosis",ylab="\n age",zlab="\n excess hazard",
ticktype="detailed",zlim=c(0,zmax))
persp(new.time,new.age,Z.tint.sub,col=colors[facet(Z.tint.sub)],main="tint",theta=theta1,
xlab="\n time since diagnosis",ylab="\n age",zlab="\n excess hazard",
ticktype="detailed",zlim=c(0,zmax))

## ----fig.show='hold'----------------------------------------------------------
data2D <- expand.grid(fu=new.time,age=c(50,60,70,80))

data2D$haz.tensor <- predict(mod.tensor.sub,data2D)$haz
data2D$haz.tint <- predict(mod.tint.sub,data2D)$haz


par(mfrow=c(2,2),mar=c(3,3,1.5,0.5),mgp=c(1.5,0.5,0))

plot(new.time,data2D[data2D$age==50,]$haz.tensor,type="l",ylim=c(0,0.7),
main="age 50",xlab="time since diagnosis",ylab="excess hazard",lwd=lwd1)
lines(new.time,data2D[data2D$age==50,]$haz.tint,col="red",lty=2,lwd=lwd1)
legend("topright",c("tensor","tint"),lty=c(1,2),col=c("black","red"),lwd=rep(lwd1,2))

for (i in c(60,70,80)){
plot(new.time,data2D[data2D$age==i,]$haz.tensor,type="l",ylim=c(0,0.7),
main=paste("age", i),xlab="time since diagnosis",ylab="excess hazard",lwd=lwd1)
lines(new.time,data2D[data2D$age==i,]$haz.tint,col="red",lty=2,lwd=lwd1)
}

## ----fig.show='hold'----------------------------------------------------------
mod.tensor.sub$aic2

mod.tint.sub$aic2

## ----fig.show='hold'----------------------------------------------------------
f4 <- ~tensor(fu,age,yod,df=c(5,5,5))

# excess hazard model
mod6 <- survPen(f4,data=datCancer,t1=fu,event=dead,expected=rate)
summary(mod6)


# predictions of surfaces for years 1990, 1997, 2003 and 2010
new.age <- seq(50,90,length=50)
new.time <- seq(0,5,length=50)

Z_1990 <- outer(new.time,new.age,function(t,a) predict(mod6,data.frame(fu=t,yod=1990,age=a))$haz)
Z_1997 <- outer(new.time,new.age,function(t,a) predict(mod6,data.frame(fu=t,yod=1997,age=a))$haz)
Z_2003 <- outer(new.time,new.age,function(t,a) predict(mod6,data.frame(fu=t,yod=2003,age=a))$haz)
Z_2010 <- outer(new.time,new.age,function(t,a) predict(mod6,data.frame(fu=t,yod=2010,age=a))$haz)


par(mfrow=c(1,2),mar=c(3,3,1.5,0.5),mgp=c(1.5,0.5,0))
persp(new.time,new.age,Z_1990,col=colors[facet(Z_1990)],main="1990",theta=20,
xlab="\n time since diagnosis",ylab="\n age",zlab="\n excess hazard",
ticktype="detailed",zlim=c(0,1))
persp(new.time,new.age,Z_1997,col=colors[facet(Z_1997)],main="1997",theta=20,
xlab="\n time since diagnosis",ylab="\n age",zlab="\n excess hazard",
ticktype="detailed",zlim=c(0,1))

par(mfrow=c(1,2),mar=c(3,3,1.5,0.5),mgp=c(1.5,0.5,0))
persp(new.time,new.age,Z_2003,col=colors[facet(Z_2003)],main="2003",theta=20,
xlab="\n time since diagnosis",ylab="\n age",zlab="\n excess hazard",
ticktype="detailed",zlim=c(0,1))
persp(new.time,new.age,Z_2010,col=colors[facet(Z_2010)],main="2010",theta=20,
xlab="\n time since diagnosis",ylab="\n age",zlab="\n excess hazard",
ticktype="detailed",zlim=c(0,1))

## ----fig.show='hold'----------------------------------------------------------
n <- 10000
don <- data.frame(num=1:n)

shape_men <- 0.90 # shape for men (first weibull parameter)
shape_women <- 0.90 # shape for women

scale_men <- 0.6 # second weibull parameter
scale_women <- 0.7

prop_men <- 0.5 # proportion of men

set.seed(50)
don$sex <- factor(sample(c("men","women"),n,replace=TRUE,prob=c(prop_men,1-prop_men)))
don$sex.order <- factor(don$sex,levels=c("women","men"),ordered=TRUE)

don$shape <- ifelse(don$sex=="men",shape_men,shape_women)
don$scale <- ifelse(don$sex=="men",scale_men,scale_women)

don$fu <- rweibull(n,shape=don$shape,scale=don$scale)
don$dead <- 1 # no censoring

## ----fig.show='hold'----------------------------------------------------------
hazard <- function(x,shape,scale){
exp(dweibull(x,shape=shape,scale=scale,log=TRUE) - pweibull(x,shape=shape,scale=scale,log.p=TRUE,lower.tail=FALSE))
}

nt <- seq(0.01,5,by=0.1)

mar1 <- c(3,3,1.5,0.5)
mgp1 <- c(1.5,0.5,0)

par(mfrow=c(1,2),mar=mar1,mgp=mgp1)
plot(nt,hazard(nt,shape_women,scale_women),type="l",
xlab="time",ylab="hazard",lwd=lwd1,main="Theoretical hazards",
ylim=c(0,max(hazard(nt,shape_women,scale_women),hazard(nt,shape_men,scale_men))))
lines(nt,hazard(nt,shape_men,scale_men),col="red",lwd=lwd1,lty=2)
legend("bottomleft",c("women","men"),lty=c(1,2),lwd=rep(lwd1,2),col=c("black","red"))

plot(nt,hazard(nt,shape_men,scale_men)/hazard(nt,shape_women,scale_women),type="l",
xlab="time",ylab="hazard ratio",lwd=lwd1,
ylim=c(0,2),
main="Theoretical HR men / women")

## ----fig.show='hold'----------------------------------------------------------
# knots for time
knots.t <- quantile(don$fu,seq(0,1,length=10))

# stratified analysis via the two models
m.men <- survPen(~smf(fu,knots=knots.t),t1=fu,event=dead,data=don[don$sex=="men",])
m.women <- survPen(~smf(fu,knots=knots.t),t1=fu,event=dead,data=don[don$sex=="women",])

# by variable with same.rho = FALSE
m.FALSE <- survPen(~sex + smf(fu,by=sex,same.rho=FALSE,knots=knots.t),t1=fu,event=dead,data=don)

# by variable with same.rho = TRUE
m.TRUE <- survPen(~sex + smf(fu,by=sex,same.rho=TRUE,knots=knots.t),t1=fu,event=dead,data=don)

# difference smooth via ordered factor by variable
m.difference <- survPen(~sex.order + smf(fu,knots=knots.t) +smf(fu,by=sex.order,same.rho=FALSE,knots=knots.t),t1=fu,event=dead,data=don)

## ----fig.show='hold'----------------------------------------------------------
newt <- seq(0,5,by=0.1)
data.pred <- expand.grid(fu=newt,sex=c("women","men"))
data.pred$men <- ifelse(data.pred$sex=="men",1,0)
data.pred$women <- ifelse(data.pred$sex=="women",1,0)
data.pred$sex.order <- data.pred$sex # no need to reorder here as the model keeps track of the factor's structure

data.pred$haz.men <- predict(m.men,data.pred)$haz
data.pred$haz.women <- predict(m.women,data.pred)$haz
data.pred$haz.FALSE <- predict(m.FALSE,data.pred)$haz
data.pred$haz.TRUE <- predict(m.TRUE,data.pred)$haz
data.pred$haz.difference <- predict(m.difference,data.pred)$haz

# predicting hazard
ylim1 <- c(0,max(data.pred$haz.men,data.pred$haz.women,
data.pred$haz.FALSE,data.pred$haz.TRUE,data.pred$haz.difference))

par(mfrow=c(1,2),mar=mar1,mgp=mgp1)
plot(newt,data.pred[data.pred$sex=="men",]$haz.men,type="l",main="Men",lwd=lwd1,
ylim=ylim1,xlab="time since diagnosis",ylab="hazard")
lines(newt,data.pred[data.pred$sex=="men",]$haz.FALSE,col="red",lwd=lwd1,lty=2)
lines(newt,data.pred[data.pred$sex=="men",]$haz.TRUE,col="green3",lwd=lwd1,lty=4)
lines(newt,data.pred[data.pred$sex=="men",]$haz.difference,col="orange",lwd=lwd1,lty=5)
lines(nt,hazard(nt,shape_men,scale_men),col="blue3",lty=3)
legend("bottomleft",c("stratified","same.rho=FALSE","same.rho=TRUE","difference smooth","true"),lty=c(1,2,4,5,3),
col=c("black","red","green3","orange","blue3"),lwd=c(rep(lwd1,4),1))

plot(newt,data.pred[data.pred$sex=="women",]$haz.women,type="l",main="Women",lwd=lwd1,
ylim=ylim1,xlab="time since diagnosis",ylab="hazard")
lines(newt,data.pred[data.pred$sex=="women",]$haz.FALSE,col="red",lwd=lwd1,lty=2)
lines(newt,data.pred[data.pred$sex=="women",]$haz.TRUE,col="green3",lwd=lwd1,lty=4)
lines(newt,data.pred[data.pred$sex=="women",]$haz.difference,col="orange",lwd=lwd1,lty=5)
lines(nt,hazard(nt,shape_women,scale_women),col="blue3",lty=3)

## ----fig.show='hold'----------------------------------------------------------
# predicting hazard ratio men / women

HR.stratified <- data.pred[data.pred$sex=="men",]$haz.men / data.pred[data.pred$sex=="women",]$haz.women
HR.FALSE <- data.pred[data.pred$sex=="men",]$haz.FALSE / data.pred[data.pred$sex=="women",]$haz.FALSE
HR.TRUE <- data.pred[data.pred$sex=="men",]$haz.TRUE / data.pred[data.pred$sex=="women",]$haz.TRUE
HR.difference <- data.pred[data.pred$sex=="men",]$haz.difference / data.pred[data.pred$sex=="women",]$haz.difference

par(mfrow=c(1,1))
plot(newt,HR.stratified,type="l",main="Hazard ratio, Men/Women",lwd=lwd1,
ylim=c(0,2),xlab="time since diagnosis",ylab="hazard ratio")
lines(newt,HR.FALSE,col="red",lwd=lwd1,lty=2)
lines(newt,HR.TRUE,col="green3",lwd=lwd1,lty=4)
lines(newt,HR.difference,col="orange",lwd=lwd1,lty=5)
abline(h=hazard(nt,shape_men,scale_men)/hazard(nt,shape_women,scale_women),lty=3,col="blue3")
legend("bottomright",c("stratified","same.rho=FALSE","same.rho=TRUE","difference smooth","true"),lty=c(1,2,4,5,3),
col=c("black","red","green3","orange","blue3"),lwd=c(rep(lwd1,4),1))

## ----fig.show='hold'----------------------------------------------------------
datCancer$agec <- datCancer$age - 50

## ----fig.show='hold'----------------------------------------------------------
m <- survPen(~smf(fu) + smf(fu,by=agec),data=datCancer,t1=fu,event=dead)
m$ll.pen

## ----fig.show='hold'----------------------------------------------------------
m.bis <- survPen(~smf(fu) + agec + tint(fu,by=agec,df=10),data=datCancer,t1=fu,event=dead)
m.bis$ll.pen # same penalized log-likelihood as m

## ----fig.show='hold'----------------------------------------------------------
m2 <- survPen(~tint(fu,df=10) + tint(agec,df=10) + tint(fu,by=agec,df=10),data=datCancer,t1=fu,event=dead)
m2$ll.pen 

## ----fig.show='hold'----------------------------------------------------------
  set.seed(1)
  
  # Weibull parameters
  shape <- 0.9
  scale <- 2
  
  # number of simulated datasets
  NFile <- 50
  
  # number of individuals per dataset
  n <- 2000
  
  # number of clusters
  NCluster <- 20
 
  # data frame
  data.rd <- data.frame(cluster=seq(1:NCluster))
  cluster <- sample(rep(1:NCluster,each=n/NCluster))
  don <- data.frame(num=1:n, cluster=factor(cluster)) # be careful, cluster needs to be a factor !
  don  <- merge(don, data.rd, by="cluster")[, union(names(don), names(data.rd))]
  don <- don[order(don$num),]
  rownames(don) <- NULL
  
  # theoretical standard deviation
  sd1 <- 0.1
  
  # vector of estimated log standard deviations
  log.sd.vec <- rep(as.numeric(NA),NFile)
  
  # maximum follow-up time
  max.time <- 5
  

## ----fig.show='hold'----------------------------------------------------------
  for (file in 1:NFile){
  
	  wj <- rnorm(NCluster,mean=0,sd=sd1) 
	  
	  don$wj <- wj[don$cluster]
	  
	  # simulated times
	  u <- runif(n)
	  don$fu <- exp( 1/shape*(log(-log(1-u)) - don$wj) + log(scale))
	  
	  # censoring
	  don$dead <- ifelse(don$fu <= max.time,1,0)
	  don$fu <- pmin(don$fu,max.time)
	  
	  # fitting
	  mod.frailty <- survPen(~smf(fu)+rd(cluster),data=don,t1=fu,event=dead)
	  
	  # estimated log standard deviation
	  log.sd.vec[file] <- summary(mod.frailty)$random.effects[,"Estimate"]
	  
   }

# Relative Bias in percentage for sd1
100*(mean(exp(log.sd.vec)) - sd1)/sd1

## ----fig.show='hold'----------------------------------------------------------
summary(mod.frailty)

## ----fig.show='hold'----------------------------------------------------------
exp(summary(mod.frailty)$random.effects)[1]

## ----fig.show='hold'----------------------------------------------------------
exp(-0.5*log(mod.frailty$lambda)-0.5*log(mod.frailty$S.scale))[2]

## ----fig.show='hold'----------------------------------------------------------
# 1-year survival for a patient in cluster 6
predict(mod.frailty,data.frame(fu=1,cluster=6))$surv

# 1-year survival for a patient in cluster 10
predict(mod.frailty,data.frame(fu=1,cluster=10))$surv

## ----fig.show='hold'----------------------------------------------------------
# 1-year survival for a patient when random effect is set to zero 
predict(mod.frailty,data.frame(fu=1,cluster=10),exclude.random=TRUE)$surv

## ----fig.show='hold'----------------------------------------------------------
# fitting
f1 <- ~smf(fu)

mod.trunc <- survPen(f1,data=datCancer,t0=begin,t1=fu,event=dead,expected=NULL,method="LAML")

# predictions
new.time <- seq(0,5,length=100)

pred.trunc <- predict(mod.trunc,data.frame(fu=new.time))

par(mfrow=c(1,2))
plot(new.time,pred.trunc$haz,type="l",ylim=c(0,0.2),main="Hazard",
xlab="time since diagnosis (years)",ylab="hazard",lwd=lwd1)

plot(new.time,pred.trunc$surv,type="l",ylim=c(0,1),main="Survival",
xlab="time since diagnosis (years)",ylab="survival",lwd=lwd1)


## ----fig.show='hold'----------------------------------------------------------

library(survPen)
data(datCancer)
data(expected.table)

#-------------------- creating split dataset for multiplicative model

splitdat <- splitmult(datCancer, cut = (1:5), end = "fu", 
event = "dead")
			
#-------------------- merging with expected mortality table

# deriving current age and year (closest whole number)
splitdat$age_current <- floor(splitdat$age + splitdat$fu + 0.5)

splitdat$year_current <- floor(splitdat$yod + splitdat$fu + 0.5)

splitdat <- merge(splitdat, expected.table, 
                 by.x=c("age_current","year_current"), by.y=c("Age","Year"),all.x=T)


# fitting model

f1 <- ~smf(fu)

# In terms of Gauss-Legendre quadrature, as the follow-up is split into up to 5 parts, the cumulative hazard approximation
# requires less nodes than an approximation on the whole range of definition. If not supplied, the default number of nodes
# for a relative mortality ratio model will be 10 (as opposed to 20 for an excess hazard model) 
mod.ratio <- survPen(f1,data=splitdat,t1=fu,event=dead,expected=mx,method="LAML",type="mult")

# predictions of the model
new.time <- seq(0,5,length=100)
pred.ratio <- predict(mod.ratio,data.frame(fu=new.time))

lwd1 <- 2

par(mfrow=c(1,1))
plot(new.time,pred.ratio$ratio,type="l",ylim=range(c(pred.ratio$ratio.inf,pred.ratio$ratio.sup)),
main="realtive mortality ratio with CIs",
xlab="time since diagnosis (years)",ylab="relative mortality ratio",lwd=lwd1)
lines(new.time,pred.ratio$ratio.inf,lty=2)
lines(new.time,pred.ratio$ratio.sup,lty=2)


## ----fig.show='hold'----------------------------------------------------------

library(survPen)
data(HeartFailure)

# We are going to fit an unpenalized spline on the log-marginal intensity in order to use the robust variance with 
# a proper theoretical background. Indeed, with penalized splines it is advised to use
# bootstrap for confidence intervals (Coz et al. submitted to Biostatistics).

# Number of knots for an unpenalized natural cubic spline
df.t <- 4
k.t <-  c(quantile(HeartFailure$t1[HeartFailure[,"event"] == 1],seq(0,1,le=df.t)))

# We consider a non-proportional effect of treatment
mod_MI <- survPen(~ treatment + smf(t1, knots = k.t, by = treatment), 
                        t0 = t0, 
                        t1 = t1, data = HeartFailure, event = event, cluster=id, lambda=0)

# predictions
nt <- seq(0,3.27,le = 100)

pred.MI <- expand.grid(t1 = nt, treatment = c(0,1))

# marginal intensity
pred.MI$MI <- predict(mod_MI, newdata = pred.MI)$haz
pred.MI$MI.inf <- predict(mod_MI, newdata = pred.MI)$haz.inf
pred.MI$MI.sup <- predict(mod_MI, newdata = pred.MI)$haz.sup


# In the absence of a terminal event we can retrieve the expected number of cumulative events
# according to time
pred.MI$Expected_Nb_event <-  -log(predict(mod_MI, newdata = pred.MI)$surv)
pred.MI$Expected_Nb_event.inf <-  -log(predict(mod_MI, newdata = pred.MI)$surv.sup)
pred.MI$Expected_Nb_event.sup <-  -log(predict(mod_MI, newdata = pred.MI)$surv.inf)


lwd1 <- 2

vec.col <- c("red","forestgreen")

par(mfrow=c(1,2))
plot(nt,pred.MI$MI[pred.MI$treatment==0],type="l",
main="marginal intensity with CIs",ylim=c(0,1.5),
xlab="time (years)",ylab="marginal intensity",lwd=lwd1,col=vec.col[1])
lines(nt,pred.MI$MI.inf[pred.MI$treatment==0],lty=2,col=vec.col[1])
lines(nt,pred.MI$MI.sup[pred.MI$treatment==0],lty=2,col=vec.col[1])

lines(nt,pred.MI$MI[pred.MI$treatment==1],lwd=lwd1,lty=1,col=vec.col[2])
lines(nt,pred.MI$MI.inf[pred.MI$treatment==1],lty=2,col=vec.col[2])
lines(nt,pred.MI$MI.sup[pred.MI$treatment==1],lty=2,col=vec.col[2])

legend("bottomleft",c("exercise training","control"),col=vec.col,lty=1)

plot(nt,pred.MI$Expected_Nb_event[pred.MI$treatment==0],type="l",
main="Expected cumulative number of events",ylim=c(0,3),
xlab="time (years)",ylab="expected cumulative number of events",lwd=lwd1,col=vec.col[1])
lines(nt,pred.MI$Expected_Nb_event.inf[pred.MI$treatment==0],lty=2,col=vec.col[1])
lines(nt,pred.MI$Expected_Nb_event.sup[pred.MI$treatment==0],lty=2,col=vec.col[1])

lines(nt,pred.MI$Expected_Nb_event[pred.MI$treatment==1],lty=1,lwd=lwd1,col=vec.col[2])
lines(nt,pred.MI$Expected_Nb_event.inf[pred.MI$treatment==1],lty=2,col=vec.col[2])
lines(nt,pred.MI$Expected_Nb_event.sup[pred.MI$treatment==1],lty=2,col=vec.col[2])

# Comparing naïve and robust standard errors

naive <- sqrt(diag(mod_MI$Vp))

robust <- sqrt(diag(mod_MI$Vr))

print(naive)
print(robust)


## ----fig.show='hold'----------------------------------------------------------
f.pen <- ~ smf(fu) 

vec.lambda <- c(0,1000,10^6)
new.time <- seq(0,5,length=100)


par(mfrow=c(1,3),mar=c(3,3,1.5,0.5),mgp=c(1.5,0.5,0))

for (i in (1:3)){

	mod.pen <- survPen(f.pen,data=datCancer,t1=fu,event=dead,lambda=vec.lambda[i])
	pred.pen <- predict(mod.pen,data.frame(fu=new.time))

	plot(new.time,pred.pen$haz,type="l",ylim=c(0,0.2),main=paste0("hazard vs time, lambda = ",vec.lambda[i]),
	xlab="time since diagnosis (years)",ylab="hazard",col="black",lwd=lwd1)

}

## ----fig.show='hold'----------------------------------------------------------
mod.pen <- survPen(f.pen,data=datCancer,t1=fu,event=dead,rho.ini=5)

mod.excess.pen <- survPen(f.pen,data=datCancer,t1=fu,event=dead,expected=rate,rho.ini=5,beta.ini=mod.pen$coef)

## ----fig.show='hold'----------------------------------------------------------
mod.pen <- survPen(f.pen,data=datCancer,t1=fu,event=dead,detail.rho=TRUE)

## ----fig.show='hold'----------------------------------------------------------
mod.pen <- survPen(f.pen,data=datCancer,t1=fu,event=dead,detail.rho=TRUE,detail.beta=TRUE)