diff --git a/R/accuracy_measures.R b/R/accuracy_measures.R index 990c9c12..86048a7a 100644 --- a/R/accuracy_measures.R +++ b/R/accuracy_measures.R @@ -141,7 +141,7 @@ print.tvROC <- function (x, digits = 4, ...) { d <- d[!is.na(d$qSN) & !is.na(d$qSP), ] d <- d[!duplicated(d[c("SN", "SP")]), ] row.names(d) <- 1:nrow(d) - print(d) + print(d, digits = digits) cat("\n") invisible(x) } diff --git a/R/basic_methods.R b/R/basic_methods.R index d569a24b..3f96de91 100644 --- a/R/basic_methods.R +++ b/R/basic_methods.R @@ -595,19 +595,24 @@ crisk_setup <- function (data, statusVar, censLevel, nameStrata = "strata", dataOut } -predict.jm <- function (object, newdata = NULL, newdata2 = NULL, - times = NULL, all_times = FALSE, times_per_id = FALSE, +predict.jm <- function (object, newdata = NULL, newdata2 = NULL, times = NULL, process = c("longitudinal", "event"), type_pred = c("response", "link"), type = c("subject_specific", "mean_subject"), - level = 0.95, return_newdata = FALSE, use_Y = TRUE, - return_mcmc = FALSE, n_samples = 200L, n_mcmc = 55L, - parallel = c("snow", "multicore"), - cores = NULL, seed = 123L, ...) { + control = NULL, ...) { process <- match.arg(process) type_pred <- match.arg(type_pred) type <- match.arg(type) - parallel <- match.arg(parallel) + con <- list(all_times = FALSE, times_per_id = FALSE, level = 0.95, + return_newdata = FALSE, use_Y = TRUE, return_mcmc = FALSE, + n_samples = 200L, n_mcmc = 55L, + parallel = c("snow", "multicore"), cores = NULL, seed = 123L) + control <- c(control, list(...)) + namC <- names(con) + con[(namc <- names(control))] <- control + if (length(noNms <- namc[!namc %in% namC]) > 0) { + warning("unknown names in control: ", paste(noNms, collapse = ", ")) + } id_var <- object$model_info$var_names$idVar time_var <- object$model_info$var_names$time_var Time_var <- object$model_info$var_names$Time_var @@ -687,21 +692,23 @@ predict.jm <- function (object, newdata = NULL, newdata2 = NULL, check_varNames(object, newdata2, id_var, "E") check_varNames(object, newdata2, id_var, "L") } - if (is.null(cores)) { + if (is.null(con$cores)) { n <- if (!is.data.frame(newdata)) length(unique(newdata$newdataL[[id_var]])) else length(unique(newdata[[id_var]])) cores <- if (n > 20) 4L else 1L } components_newdata <- - get_components_newdata(object, newdata, n_samples, - n_mcmc, parallel, cores, seed, use_Y) + get_components_newdata(object, newdata, con$n_samples, + con$n_mcmc, con$parallel, con$cores, con$seed, + con$use_Y) if (process == "longitudinal") { predict_Long(object, components_newdata, newdata, newdata2, times, - all_times, times_per_id, type, type_pred, level, - return_newdata, return_mcmc) + con$all_times, con$times_per_id, type, type_pred, con$level, + con$return_newdata, con$return_mcmc) } else { predict_Event(object, components_newdata, newdata, newdata2, times, - times_per_id, level, return_newdata, return_mcmc) + con$times_per_id, con$level, con$return_newdata, + con$return_mcmc) } } @@ -1018,18 +1025,25 @@ rc_setup <- function(rc_data, trm_data, } predict.jmList <- function (object, weights, newdata = NULL, newdata2 = NULL, - times = NULL, all_times = FALSE, times_per_id = FALSE, - process = c("longitudinal", "event"), + times = NULL, process = c("longitudinal", "event"), type_pred = c("response", "link"), type = c("subject_specific", "mean_subject"), - level = 0.95, return_newdata = FALSE, - return_mcmc = FALSE, n_samples = 200L, n_mcmc = 55L, - parallel = c("snow", "multicore"), - cores = parallelly::availableCores(omit = 1L), ...) { + control = NULL, ...) { process <- match.arg(process) type_pred <- match.arg(type_pred) type <- match.arg(type) parallel <- match.arg(parallel) + con <- list(all_times = FALSE, times_per_id = FALSE, level = 0.95, + return_newdata = FALSE, use_Y = TRUE, return_mcmc = FALSE, + n_samples = 200L, n_mcmc = 55L, + parallel = c("snow", "multicore"), + cores = parallelly::availableCores(omit = 1L), seed = 123L) + control <- c(control, list(...)) + namC <- names(con) + con[(namc <- names(control))] <- control + if (length(noNms <- namc[!namc %in% namC]) > 0) { + warning("unknown names in control: ", paste(noNms, collapse = ", ")) + } obj <- object[[1L]] id_var <- obj$model_info$var_names$idVar time_var <- obj$model_info$var_names$time_var @@ -1153,7 +1167,7 @@ predict.jmList <- function (object, weights, newdata = NULL, newdata2 = NULL, "variable(s): ", paste(missing_vars, collapse = ", "), ".\n") } } - cores <- min(cores, length(object)) + cores <- min(con$cores, length(object)) if (cores > 1L) { have_mc <- have_snow <- FALSE if (parallel == "multicore") { @@ -1169,11 +1183,13 @@ predict.jmList <- function (object, weights, newdata = NULL, newdata2 = NULL, preds <- parallel::mclapply(object, predict, newdata = newdata, newdata2 = newdata2, times = times, - all_times = all_times, - times_per_id = times_per_id, + all_times = con$all_times, + times_per_id = con$times_per_id, process = process, type_pred = type_pred, - type = type, level = level, n_samples = n_samples, - n_mcmc = n_mcmc, return_newdata = return_newdata, + type = type, level = con$level, + n_samples = con$n_samples, + n_mcmc = con$n_mcmc, + return_newdata = con$return_newdata, return_mcmc = TRUE, mc.cores = cores) } else { cl <- parallel::makePSOCKcluster(rep("localhost", cores)) @@ -1181,11 +1197,13 @@ predict.jmList <- function (object, weights, newdata = NULL, newdata2 = NULL, preds <- parallel::parLapply(cl, object, predict, newdata = newdata, newdata2 = newdata2, times = times, - all_times = all_times, - times_per_id = times_per_id, + all_times = con$all_times, + times_per_id = con$times_per_id, process = process, type_pred = type_pred, - type = type, level = level, n_samples = n_samples, - n_mcmc = n_mcmc, return_newdata = return_newdata, + type = con$type, level = con$level, + n_samples = con$n_samples, + n_mcmc = con$n_mcmc, + return_newdata = con$return_newdata, return_mcmc = TRUE) parallel::stopCluster(cl) } @@ -1193,10 +1211,10 @@ predict.jmList <- function (object, weights, newdata = NULL, newdata2 = NULL, preds <- lapply(object, predict, newdata = newdata, newdata2 = newdata2, times = times, - all_times = all_times, times_per_id = times_per_id, + all_times = con$all_times, times_per_id = con$times_per_id, process = process, type_pred = type_pred, - type = type, level = level, n_samples = n_samples, - n_mcmc = n_mcmc, return_newdata = return_newdata, + type = type, level = con$level, n_samples = con$n_samples, + n_mcmc = con$n_mcmc, return_newdata = con$return_newdata, return_mcmc = TRUE) } extract_mcmc <- function (x) { diff --git a/docs/articles/Causal_Effects.html b/docs/articles/Causal_Effects.html index 6a7d427e..6e0d576d 100644 --- a/docs/articles/Causal_Effects.html +++ b/docs/articles/Causal_Effects.html @@ -135,7 +135,7 @@
vignettes/Causal_Effects.Rmd
Causal_Effects.Rmd
The coefficient for drugD-penicil
for the survival
outcome in the output produced by the summary()
method
denotes the residual/direct effect of treatment on the risk of the
@@ -259,12 +259,13 @@
# estimate
Pr1$pred[2L] - Pr0$pred[2L]
-#> [1] 0.002916962
-
+#> [1] 0.002916423
+
+
# MCMC variability
quantile(Pr1$mcmc[2L, ] - Pr0$mcmc[2L, ], probs = c(0.025, 0.975))
#> 2.5% 97.5%
-#> -0.1790865 0.1952537
vignettes/Competing_Risks.Rmd
Competing_Risks.Rmd
The first step to fit a joint model for competing events in +
The first step in fitting a joint model for competing events in
JMbayes2 is to prepare the data for the event process.
If there are \(K\) competing events,
-then each subject needs to have \(K\)
-rows, one for each possible cause. The observed event time \(T_i\) of each subject is repeated \(K\) times, and there are two indicator
-variables, namely one identifying the cause, and one indicating whether
+each subject must have \(K\) rows, one
+for each possible cause. The observed event time \(T_i\) of each subject is repeated \(K\) times, and there are two indicator
+variables, namely one identifying the cause and one indicating whether
the corresponding event type is the one that occurred. Standard survival
-datasets that included a single row per patient, can be easily
-transformed to the competing risks long format using function
+datasets that include a single row per patient can be easily transformed
+to the competing risks long format using the function
crisk_setup()
. This function accepts as main arguments the
-survival data in the standard format that has a single row per patient,
-the name of the status variable, and the level in this status variable
-that corresponds to censoring. We illustrate the use of this function in
-the PBC data, in which we treat as competing risks transplantation and
+survival data in the standard format with a single row per patient, the
+name of the status variable, and the level in this status variable that
+corresponds to censoring. We illustrate the use of this function in the
+PBC data, in which we treat as competing risks transplantation and
death:
pbc2.id[pbc2.id$id %in% c(1, 2, 5), c("id", "years", "status")]
#> id years status
#> 1 1 1.095170 dead
#> 2 2 14.152338 alive
-#> 5 5 4.120578 transplanted
-
+#> 5 5 4.120578 transplanted
+
pbc2.idCR <- crisk_setup(pbc2.id, statusVar = "status", censLevel = "alive",
nameStrata = "CR")
@@ -199,21 +200,21 @@ Fit models
+
CoxFit_CR <- coxph(Surv(years, status2) ~ (age + drug) * strata(CR),
data = pbc2.idCR)
-For the longitudinal process, we include two longitudinal outcomes,
-namely serum bilirubin and the prothrombin time. For the former we use
+
We include two longitudinal outcomes for the longitudinal process,
+namely serum bilirubin and the prothrombin time. For the former, we use
quadratic orthogonal polynomials in the fixed- and random-effects parts,
-and for the latter linear evolutions:
-
+and for the latter, linear evolutions:
+
fm1 <- lme(log(serBilir) ~ poly(year, 2) * drug, data = pbc2,
random = ~ poly(year, 2) | id)
fm2 <- lme(prothrombin ~ year * drug, data = pbc2, random = ~ year | id)
To specify that each longitudinal outcome has a separate association
coefficient per competing risk, we define the corresponding functional
forms:
-
+
CR_forms <- list(
"log(serBilir)" = ~ value(log(serBilir)):CR,
"prothrombin" = ~ value(prothrombin):CR
@@ -222,7 +223,7 @@ Fit modelsjm()
(due to the complexity of the model, we have
increased the number of MCMC iterations and the burn-in period per
chain):
-
+
jFit_CR <- jm(CoxFit_CR, list(fm1, fm2), time_var = "year",
functional_forms = CR_forms,
n_iter = 25000L, n_burnin = 5000L, n_thin = 5L)
@@ -296,35 +297,35 @@ Fit models#> iterations per chain: 25000
#> burn-in per chain: 5000
#> thinning: 5
-#> time: 6.4 min
+#> time: 6.5 min
Dynamic predictions
Based on the fitted competing risks joint model, we will illustrate
-how (dynamic) predictions for the cause-specific cumulative risk
-probabilities can be calculated. As an example, we will show these
-calculations for Patient 81 from the PBC dataset. First, we extract the
-data of this subject.
-
+how (dynamic) predictions can be calculated for the cause-specific
+cumulative risk probabilities. We will show these calculations for
+Patient 81 from the PBC dataset as an example. First, we extract the
+data on this subject.
+
ND_long <- pbc2[pbc2$id == 81, ]
ND_event <- pbc2.idCR[pbc2.idCR$id == 81, ]
ND_event$status2 <- 0
ND <- list(newdataL = ND_long, newdataE = ND_event)
The first line extracts the longitudinal measurements, and the second
line extracts the event times per cause (i.e., death and
-transplantation). This particular patient died at 6.95 years, but to
-make the calculation of cause-specific cumulative risk more relevant, we
-presume that she did not have the event, and we set the event status
-variable status2
to zero. The last line combines the two
-datasets in a list. Note: this last step is a prerequisite from
-the predict()
method for competing risks joint model. That
-is, the datasets provided in the arguments newdata
and
+transplantation). This patient died at 6.95 years, but to make the
+calculation of cause-specific cumulative risk more relevant, we presume
+that she did not have the event, and we set the event status variable
+status2
to zero. The last line combines the two datasets in
+a list. Note: this last step is a prerequisite from the
+predict()
method for competing risks joint model. That is,
+the datasets provided in the arguments newdata
and
newdata2
need to be named lists with two components. The
first component needs to be named newdataL
and contain the
-dataset with the longitudinal measurements, and the second component
-needs to be named newdataE
and contain the dataset with the
-event information.
+dataset with the longitudinal measurements. The second component needs
+to be named newdataE
and contain the dataset with the event
+information.
The predictions are calculated using the predict()
method. The first call to this function calculates the prediction for
the longitudinal outcomes at the times provided in the
@@ -333,7 +334,7 @@
Dynamic predictionsplot() method to depict the
predictions:
-
+
predLong <- predict(jFit_CR, newdata = ND, return_newdata = TRUE,
times = seq(6.5, 15, length = 25))
@@ -345,7 +346,7 @@ Dynamic predictions= c("#03BF3D4D", "#FF00004D"), pos_ylab_long = c(1.5, 11.5))
legend(x = 8.1, y = 0.45, legend = levels(pbc2.idCR$CR),
lty = 1, lwd = 2, col = c("#03BF3D", "#FF0000"), bty = "n", cex = 0.8)
-
+
diff --git a/docs/articles/Competing_Risks_files/figure-html/CIFs-1.png b/docs/articles/Competing_Risks_files/figure-html/CIFs-1.png
index d5987b1e..25dba47c 100644
Binary files a/docs/articles/Competing_Risks_files/figure-html/CIFs-1.png and b/docs/articles/Competing_Risks_files/figure-html/CIFs-1.png differ
diff --git a/docs/articles/Dynamic_Predictions.html b/docs/articles/Dynamic_Predictions.html
index eae098ca..53c60d5a 100644
--- a/docs/articles/Dynamic_Predictions.html
+++ b/docs/articles/Dynamic_Predictions.html
@@ -135,7 +135,7 @@ Dynamic Predictions
Dimitris
Rizopoulos
- 2024-04-22
+ 2024-05-25
Source: vignettes/Dynamic_Predictions.Rmd
Dynamic_Predictions.Rmd
@@ -165,10 +165,10 @@ Theory
\theta)} \; p\{b_j \mid T_j^* > t, \mathcal Y_j(t), \theta\} \;
p(\theta \mid \mathcal D_n) \; db_j d\theta,
\end{array}\] where \(S(\cdot)\)
-denotes the survival function conditional on the random effects, and
+denotes the survival function conditional on the random effects and
\(\mathcal Y_j(t) = \{\mathcal Y_{1j}(t),
\ldots, \mathcal Y_{Kj}(t)\}\). Combining the three terms in the
-integrand we can device a Monte Carlo scheme to obtain estimates of
+integrand, we can devise a Monte Carlo scheme to obtain estimates of
these probabilities, namely,
Sample a value \(\tilde \theta\)
@@ -177,8 +177,9 @@
Theory
Sample a value \(\tilde b_j\)
from the posterior of the random effects \([b_j \mid T_j^* > t, \mathcal Y_j(t), \tilde
\theta]\).
-Compute the ratio of survival probabilities \(S(u \mid \tilde b_j, \tilde \theta) \Big / S(t
-\mid \tilde b_j, \tilde \theta)\).
+Compute the ratio of survival probabilities \(S(u \mid \tilde b_j,
+\tilde \theta) \Big / S(t \mid \tilde b_j, \tilde
+\theta)\).
Replicating these steps \(L\) times,
we can estimate the conditional cumulative risk probabilities by \[1 - \frac{1}{L} \sum_{l=1}^L \frac{S(u \mid
@@ -193,7 +194,7 @@ Example
We will illustrate the calculation of dynamic predictions using
package JMbayes2 from a trivariate joint model fitted
to the PBC dataset for the longitudinal outcomes serBilir
-(continuous), prothrombin
time (continuous) and
+(continuous), prothrombin
time (continuous), and
ascites
(dichotomous). We start by fitting the univariate
mixed models. For the two continuous outcomes, we allow for nonlinear
subject-specific time effects using natural cubic splines. For
@@ -228,25 +229,25 @@
Example
t0 <- 5
ND <- pbc2[pbc2$id %in% c(25, 93), ]
ND <- ND[ND$year < t0, ]
-ND$status2 <- 0
+ND$event <- 0
ND$years <- t0
-We will only use the first five years of follow-up (line three), and
-further we specify that the patients were event-free up to this time
-point (lines four and five).
+We will only use the first five years of follow-up (line three) and
+specify that the patients were event-free up to this point (lines four
+and five).
We start with predictions for the longitudinal outcomes. These are
produced by the predict()
method for class jm
-objects, and follow the same lines as the procedure described above for
+objects and follow the same lines as the procedure described above for
cumulative risk probabilities. The only difference is in Step 3, where
-instead of calculating the cumulative risk we calculate the predicted
+instead of calculating the cumulative risk, we calculate the predicted
values for the longitudinal outcomes. There are two options controlled
by the type_pred
argument, namely predictions at the scale
of the response/outcome (default) or at the linear predictor level. The
-type
argument controls if the predictions will be for the
-mean subject (i.e., including only the fixed effects) or
-subject-specific including both the fixed and random effects. In the
+type
argument controls whether the predictions will be for
+the mean subject (i.e., including only the fixed effects) or
+subject-specific, including both the fixed and random effects. In the
newdata
argument we provide the available measurements of
-the two patients. This will be used to sample their random effects at
-Step 2 presented above. This is done with a Metropolis-Hastings
+the two patients. This will be used to sample their random effects in
+Step 2, presented above. This is done with a Metropolis-Hastings
algorithm that runs for n_mcmc
iterations; all iterations
but the last one are discarded as burn-in. Finally, argument
n_samples
corresponds to the value of \(L\) defined above and specifies the number
@@ -255,15 +256,15 @@
Example
predLong1 <- predict(jointFit, newdata = ND, return_newdata = TRUE)
Argument return_newdata
specifies that the predictions
are returned as extra columns of the newdata
data.frame. By
-default the 95% credible intervals are also included. Using the
+default, the 95% credible intervals are also included. Using the
plot()
method for objects returned by
predict.jm(..., return_newdata = TRUE)
, we can display the
-predictions. With the following code we do that for the first
+predictions. With the following code, we do that for the first
longitudinal outcome:
plot(predLong1)
-When we want to calculate predictions for other, future time points,
+
When we want to calculate predictions for other future time points,
we can accordingly specify the times
argument. In the
following example, we calculate predictions from time t0
to
time 12:
@@ -292,28 +293,28 @@ Example
plot(predLong2, predSurv)
-Again by default, the plot is for the predictions of the first
-subject (i.e., Patient 25) and for the first longitudinal outcome (i.e.,
+
Again, by default, the plot is for the predictions of the first
+subject (i.e., Patient 25) and the first longitudinal outcome (i.e.,
log(serBilir)
). However, the plot()
method has
-a series of arguments that allows users to customize the plot. We
+a series of arguments that allow users to customize the plot. We
illustrate some of these capabilities with the following figure. First,
we specify that we want to depict all three outcomes using
outcomes = 1:3
(note: a max of three outcomes can be
simultaneously displayed). Next, we specify via the subject
-argument that we want to show the predictions of Patient 93. Note, that
-for serum bilirubin we used the log transformation in the specification
+argument that we want to show the predictions of Patient 93. Note that
+for serum bilirubin, we used the log transformation in the specification
of the linear mixed model. Hence, we receive predictions on the
transformed scale. To show predictions on the original scale, we use the
fun_long
argument. Because we have three outcomes, this
needs to be a list of three functions. The first one, corresponding to
-serum bilirubin is the exp()
and for the other two the
+serum bilirubin, is the exp()
, and for the other two the
identity()
because we do not wish to transform the
predictions. Analogously, we also have the fun_event
argument to transform the predictions for the event outcome, and in the
-example below we set that we want to obtain survival probabilities.
+example below, we set the goal of obtaining survival probabilities.
Using the arguments bg
, col_points
,
col_line_long
, col_line_event
,
-fill_CI_long
, and fill_CI_event
we have
+fill_CI_long
, and fill_CI_event
, we have
changed the appearance of the plot to a dark theme. Finally, the
pos_ylab_long
specifies the relative positive of the y-axis
labels for the three longitudinal outcomes.
@@ -351,66 +352,69 @@ Predictive accuracy#>
#> At time: 8
#> Using information up to time: 5 (202 subjects still at risk)
+#> Accounting for censoring using model-based weights
#>
-#> cut-off SN SP qSN qSP
-#> 1 0.06 0.04245478 1.00000000 0.03287933 1.000000000
-#> 2 0.07 0.06368217 1.00000000 0.04956683 1.000000000
-#> 3 0.09 0.13824070 0.99039566 0.10270423 0.757490419
-#> 4 0.11 0.15946809 0.99039566 0.12027230 0.784435928
-#> 5 0.14 0.17720224 0.98287707 0.12981598 0.685560692
-#> 6 0.15 0.19842963 0.98287707 0.14780414 0.711763968
-#> 7 0.16 0.21965702 0.98287707 0.16598264 0.733935970
-#> 8 0.19 0.24088441 0.98287707 0.18435453 0.752940544
-#> 9 0.22 0.28333920 0.98287707 0.22169095 0.783822976
-#> 10 0.23 0.28333920 0.97642092 0.21748388 0.719825009
-#> 11 0.25 0.30456659 0.97642092 0.23653506 0.735390286
-#> 12 0.26 0.32579398 0.97642092 0.25579444 0.749317113
-#> 13 0.27 0.34702137 0.96996477 0.27126141 0.711089652
-#> 14 0.32 0.37433642 0.96536013 0.29394390 0.695771598
-#> 15 0.36 0.38028575 0.95425727 0.29275549 0.630398775
-#> 16 0.38 0.40151314 0.95425727 0.31310031 0.644614207
-#> 17 0.39 0.41300182 0.95129532 0.32243639 0.635616827
-#> 18 0.43 0.43422921 0.94483917 0.33938902 0.615776271
-#> 19 0.44 0.47668399 0.94483917 0.38181384 0.640564899
-#> 20 0.46 0.47732481 0.93857792 0.37893889 0.612273095
-#> 21 0.49 0.49855220 0.93857792 0.40063635 0.624022395
-#> 22 0.52 0.51977959 0.93857792 0.42259212 0.635080560
-#> 23 0.54 0.52965629 0.93512569 0.43108126 0.625582558
-#> 24 0.58 0.57211107 0.92866954 0.47296608 0.620822291
-#> 25 0.60 0.60273913 0.92507253 0.50465003 0.621616281
-#> 26 0.63 0.60273913 0.90570408 0.49530380 0.557028461
-#> 27 0.64 0.60799325 0.90084593 0.49882681 0.544792672
-#> 28 0.66 0.62922064 0.89438978 0.51988827 0.536233371
-#> 29 0.68 0.62951537 0.88156712 0.51403965 0.501594951
-#> 30 0.69 0.62951537 0.87511097 0.51086344 0.485151330
-#> 31 0.70 0.63284491 0.86321132 0.50883889 0.458209535
-#> 32 0.71 0.63597971 0.85125244 0.50649598 0.433075346
-#> 33 0.72 0.63597971 0.84479629 0.50316150 0.419423165
-#> 34 0.73 0.63597971 0.83188399 0.49635549 0.393581535
-#> 35 0.74 0.65998105 0.82627151 0.52302897 0.394945617
-#> 36 0.76 0.65998105 0.81335921 0.51631107 0.371642681
-#> 37 0.77 0.66418799 0.80172642 0.51547124 0.354011885
-#> 38 0.78 0.68541538 0.79527027 0.53952106 0.353821788
-#> 39 0.79 0.69553457 0.77252334 0.54102972 0.324260513
-#> 40 0.80 0.71836140 0.76655365 0.56900760 0.326340532
-#> 41 0.82 0.72629360 0.74314156 0.56805709 0.298845892
-#> 42 0.83 0.72999422 0.72489863 0.56367067 0.278305494
-#> 43 0.84 0.72999422 0.71198633 0.55657588 0.263559976
-#> 44 0.85 0.73639713 0.67519682 0.54489077 0.228114791
-#> 45 0.86 0.80007930 0.67519682 0.64575455 0.254429059
-#> 46 0.87 0.80724902 0.63864053 0.63948428 0.223461563
-#> 47 0.88 0.82996019 0.61326721 0.66652388 0.210908847
-#> 48 0.89 0.83566180 0.59563285 0.66803683 0.199194476
-#> 49 0.90 0.85819121 0.58311655 0.70468685 0.197995653
-#> 50 0.91 0.85819121 0.55729195 0.69198521 0.179568567
-#> 51 0.92 0.86201058 0.50680438 0.67207221 0.148499865
-#> 52 0.93 0.88498038 0.44922896 0.69021383 0.123970472
-#> 53 0.94 0.88851980 0.41156854 0.67363769 0.106292074
-#> 54 0.95 0.91060251 0.37309178 0.70873720 0.095460992
-#> 55 0.96 0.91195076 0.30248418 0.65125594 0.066899363
-#> 56 0.97 0.93402438 0.19298699 0.59614921 0.035404565
-#> 57 0.98 0.99950031 0.06440953 0.98990626 0.015680861
-#> 58 0.99 1.00000000 0.01291230 1.00000000 0.003041425
In the first line we define the event indicator as we did in the
This function either accepts an object of class To assess the accuracy of the predictions we produce a calibration
+ To assess the accuracy of the predictions, we produce a calibration
plot: The ICI is the mean absolute difference between the observed and
predicted probabilities, E50 is the median absolute difference, and E90
is the 90% percentile of the absolute differences. Finally, we calculate
@@ -460,7 +465,7 @@ Our aim is to assess the strength of the association between the risk
-of the composite event and the levels of serum bilirubin that has been
-collected during follow-up. We will describe the patient-specific
-profiles over time for this biomarker using a linear mixed model, with
-fixed-effects, time, sex, and their interaction, and as random effects
-random intercepts and random slopes. The syntax to fit this model with
- We aim to assess the strength of the association between the risk of
+the composite event and the serum bilirubin levels collected during
+follow-up. We will describe the patient-specific profiles over time for
+this biomarker using a linear mixed model, with fixed effects, time,
+sex, and their interaction, and as random effects, random intercepts,
+and random slopes. The syntax to fit this model with The joint model that links the survival and longitudinal submodels is
@@ -227,7 +227,7 @@ pbc2.id
data.frame. The cut-point with the asterisk on the
right maximizes the Youden’s
@@ -424,16 +428,17 @@ Predictive accuracy#>
#> Time-dependent AUC for the Joint Model jointFit
#>
-#> Estimated AUC: 0.8088
+#> Estimated AUC: 0.8282
#> At time: 8
-#> Using information up to time: 5 (202 subjects still at risk)
+#> Using information up to time: 5 (202 subjects still at risk)
+#> Accounting for censoring using model-based weights
tvROC
or
of class jm
. In the latter case, the user must also provide
the newdata
, Tstart
and Dt
or
Thoriz
arguments. Here we have used the same dataset as the
one to fit the model, but, in principle, discrimination could be
(better) assessed in another dataset.
calibration_plot(jointFit, newdata = pbc2, Tstart = t0, Dt = 3)
Predictive accuracy
+#> 0.06123482 0.05621041 0.11553538
calibration_metrics(jointFit, pbc2, Tstart = 5, Dt = 3)
#> ICI E50 E90
-#> 0.05763675 0.04720823 0.12893676
Predictive accuracy#>
#> Prediction Error for the Joint Model 'jointFit'
#>
-#> Estimated Brier score: 0.1268
+#> Estimated Brier score: 0.1242
#> At time: 8
#> For the 202 subjects at risk at time 5
#> Number of subjects with an event in [5, 8): 40
@@ -492,7 +497,7 @@
Predictive accuracy#>
#> Prediction Error for the Joint Model 'jointFit'
#>
-#> Estimated Integrated Brier score: 0.0844
+#> Estimated Integrated Brier score: 0.0834
#> In the time interval: [5, 8)
#> For the 202 subjects at risk at time 5
#> Number of subjects with an event in [5, 8): 40
@@ -502,17 +507,17 @@
Predictive accuracyDimitris
Rizopoulos
-
2024-04-22
+ 2024-05-25
Source: vignettes/JMbayes2.Rmd
JMbayes2.Rmd
Univariate
-pbc2.id$status2 <- as.numeric(pbc2.id$status != 'alive')
CoxFit <- coxph(Surv(years, status2) ~ sex, data = pbc2.id)
lme()
is:lme()
+is:
fm1 <- lme(log(serBilir) ~ year * sex, data = pbc2, random = ~ year | id)
Univariatejm() adds the subject-specific linear
predictor of the mixed model as a time-varying covariate in the survival
-relative risk model. In the output this is named as
+relative risk model. In the output, this is named as
value(log(serBilir))
to denote that, by default, the
current value functional form is used. That is, we assume that the
instantaneous risk of an event at a specific time \(t\) is associated with the value of the
@@ -252,9 +252,9 @@ Multivariatejm(). In the following example, we extend the joint model
we fitted above by also including the prothrombin time and the log odds
-of the presence or not of ascites as time-varying covariates in the
+of the presence or absence of ascites as time-varying covariates in the
relative risk model for the composite event. Ascites is a dichotomous
-outcome and therefore we fit a mixed-effects logistic regression model
+outcome, and therefore, we fit a mixed-effects logistic regression model
for it using the
mixed_model()
function from the
GLMMadaptive package. The use of ||
in the
random
argument of mixed_model()
specifies
@@ -266,7 +266,7 @@ Multivariate. Because this
joint model is more complex, we increase the number of MCMC iterations,
-the number of burn-in iterations and the thinning per chain, using the
+the number of burn-in iterations, and the thinning per chain using the
corresponding control arguments:
The output for the survival submodel contains now the estimated
-coefficients for value(prothrombin)
and
-value(ascites)
, and parameter estimates for all three
-longitudinal submodels.
The survival submodel output now contains the estimated coefficients
+for value(prothrombin)
and value(ascites)
, as
+well as parameter estimates for all three longitudinal submodels.
jm()
includes
the subject-specific linear predictors of the mixed-effects models as
time-varying covariates in the relative risk model. However, this is
-just one of the many possibilities we have to link the longitudinal and
-survival outcomes. The argument functional_forms
of
+just one of the many possibilities for linking longitudinal and survival
+outcomes. The argument functional_forms
of
jm()
provides additional options. Based on previous
-experience, two extra functional forms are provided, namely, the
-time-varying slope and the time-varying normalized
-area/cumulative-effect. The time-varying slope is the first order
-derivative of the subject-specific linear predictor of the mixed-effect
-model with respect to the (follow-up) time variable. The time-varying
-normalized area/cumulative-effect is the integral of the
-subject-specific linear predictor of the mixed-effect model from zero to
-the current (follow-up) time \(t\)
-divided by \(t\). The integral is the
-area under the subject-specific longitudinal profile; by dividing the
-integral by \(t\) we obtain the average
-of the subject-specific longitudinal profile over the corresponding
-period \((0, t)\).
+experience, two extra functional forms are provided: the time-varying
+slope and the time-varying normalized area/cumulative effect.
+The time-varying slope is the first-order derivative of the
+subject-specific linear predictor of the mixed-effect model with respect
+to the (follow-up) time variable. The time-varying normalized
+area/cumulative effect is the integral of the subject-specific linear
+predictor of the mixed-effect model from zero to the current (follow-up)
+time \(t\) divided by \(t\). The integral is the area under the
+subject-specific longitudinal profile; by dividing the integral by \(t\), we obtain the average of the
+subject-specific longitudinal profile over the corresponding period
+\((0, t)\).
To illustrate how the functional_forms
argument can be
used to specify these functional forms, we update the joint model
jointFit2
by including the time-varying slope of log serum
-bilirubin instead of the value, and also the interaction of this slope
-with sex, and for prothrombin we include the normalized cumulative
-effect. For ascites, we keep the value functional form. The
+bilirubin instead of the value and also the interaction of this slope
+with sex and for prothrombin we include the normalized cumulative
+effect. For ascites, we keep the current value functional form. The
corresponding syntax to fit this model is:
fForms <- list(
@@ -395,61 +392,61 @@ Functional forms#> ascites: 1885
#>
#> DIC WAIC LPML
-#> marginal 11666.74 12712.68 -6982.086
-#> conditional 12731.24 12458.34 -6763.103
+#> marginal 11692.23 12995.55 -7306.362
+#> conditional 12656.91 12381.64 -6669.114
#>
#> Random-effects covariance matrix:
#>
#> StdDev Corr
-#> (Intr) 0.9945 (Intr) year (Intr) year (Intr)
-#> year 0.1861 0.4621
-#> (Intr) 0.7573
-#> year 0.3257 -0.0166
-#> (Intr) 2.6363 0.5455 0.4677 0.3328 -0.0735
-#> year 0.4496 0.4475 0.6749 -0.0672 0.3544
+#> (Intr) 0.9989 (Intr) year (Intr) year (Intr)
+#> year 0.1853 0.4548
+#> (Intr) 0.7500
+#> year 0.3232 -0.0047
+#> (Intr) 2.5559 0.5529 0.4692 0.3487 -0.0775
+#> year 0.4361 0.4303 0.6690 -0.0720 0.3773
#>
#> Survival Outcome:
-#> Mean StDev 2.5% 97.5% P Rhat
-#> sexfemale 0.2641 0.9399 -1.5059 2.1004 0.7927 1.0628
-#> slope(log(serBilir)) 4.4155 2.2522 0.1458 8.8060 0.0427 1.0502
-#> slope(log(serBilir)):sexfemale -3.7942 2.8217 -9.3871 1.2246 0.1647 1.1375
-#> area(prothrombin) -0.2819 0.2510 -0.7735 0.1738 0.2777 1.7284
-#> value(ascites) 0.9501 0.1967 0.6093 1.3780 0.0000 1.4029
+#> Mean StDev 2.5% 97.5% P Rhat
+#> sexfemale 0.3595 0.9644 -1.3670 2.3829 0.7557 1.0662
+#> slope(log(serBilir)) 4.3938 2.5197 -0.0624 9.5851 0.0577 1.1294
+#> slope(log(serBilir)):sexfemale -4.3880 3.0045 -11.1525 0.5844 0.1020 1.1396
+#> area(prothrombin) -0.4097 0.3097 -0.9998 0.1627 0.1957 1.3476
+#> value(ascites) 1.0639 0.2373 0.6257 1.5698 0.0000 1.3015
#>
#> Longitudinal Outcome: log(serBilir) (family = gaussian, link = identity)
#> Mean StDev 2.5% 97.5% P Rhat
-#> (Intercept) 0.6713 0.1647 0.3556 0.9981 0.0000 1.0006
-#> year 0.2662 0.0334 0.2025 0.3332 0.0000 1.0065
-#> sexfemale -0.2084 0.1757 -0.5552 0.1283 0.2370 1.0003
-#> year:sexfemale -0.0739 0.0344 -0.1425 -0.0074 0.0287 1.0063
-#> sigma 0.3483 0.0067 0.3354 0.3620 0.0000 1.0029
+#> (Intercept) 0.6684 0.1660 0.3397 0.9945 0.0003 1.0079
+#> year 0.2658 0.0336 0.2025 0.3346 0.0000 1.0005
+#> sexfemale -0.2049 0.1768 -0.5497 0.1392 0.2433 1.0087
+#> year:sexfemale -0.0745 0.0348 -0.1450 -0.0079 0.0270 1.0008
+#> sigma 0.3483 0.0066 0.3352 0.3615 0.0000 1.0046
#>
#> Longitudinal Outcome: prothrombin (family = gaussian, link = identity)
#> Mean StDev 2.5% 97.5% P Rhat
-#> (Intercept) 10.9891 0.1682 10.6545 11.3202 0.0000 1.0024
-#> year 0.1919 0.0768 0.0475 0.3468 0.0107 1.0144
-#> sexfemale -0.4481 0.1792 -0.7986 -0.0929 0.0153 1.0017
-#> year:sexfemale 0.0625 0.0809 -0.1007 0.2208 0.4293 1.0134
-#> sigma 1.0581 0.0203 1.0187 1.0983 0.0000 1.0133
+#> (Intercept) 10.9993 0.1684 10.6565 11.3294 0.0000 1.0003
+#> year 0.1839 0.0764 0.0325 0.3362 0.0140 1.0056
+#> sexfemale -0.4574 0.1786 -0.7983 -0.1039 0.0140 1.0006
+#> year:sexfemale 0.0702 0.0806 -0.0884 0.2279 0.3857 1.0058
+#> sigma 1.0591 0.0203 1.0197 1.0994 0.0000 1.0152
#>
#> Longitudinal Outcome: ascites (family = binomial, link = logit)
#> Mean StDev 2.5% 97.5% P Rhat
-#> (Intercept) -4.4620 0.6429 -5.7500 -3.2232 0.0000 1.0493
-#> year 0.6412 0.0779 0.4904 0.8015 0.0000 1.2321
-#> sexfemale -0.4484 0.6183 -1.6790 0.7589 0.4547 1.0035
+#> (Intercept) -4.4105 0.6235 -5.7030 -3.2274 0.0000 1.0378
+#> year 0.6304 0.0777 0.4830 0.7938 0.0000 1.2194
+#> sexfemale -0.4225 0.6122 -1.6453 0.7616 0.4833 1.0057
#>
#> MCMC summary:
#> chains: 3
#> iterations per chain: 12000
#> burn-in per chain: 2000
#> thinning: 5
-#> time: 1.9 min
As seen above, the functional_forms
argument is a named
list with elements corresponding to the longitudinal outcomes. If a
-longitudinal outcome is not specified in this list, then the default,
-value functional form, is used for that outcome. Each element of the
-list should be a one-sided R formula in which the functions
-value()
, slope()
and area()
can
+longitudinal outcome is not specified in this list, then the default
+value functional form is used for that outcome. Each element of the list
+should be a one-sided R formula in which the functions
+value()
, slope()
, and area()
can
be used. Interaction terms between the functional forms and other
(baseline) covariates are also allowed.
When multiple longitudinal outcomes are considered with possibly
different functional forms per outcome, we require to fit a relative
risk model containing several terms. Moreover, it is often of scientific
-interest to select which terms/functional-forms per longitudinal outcome
+interest to select which terms/functional forms per longitudinal outcome
are more strongly associated with the risk of the event of interest. To
-facilitate this selection, jm()
provides the option to
-penalize the regression coefficients using shrinkage priors. As an
-example, we refit jointFit3
by assuming a Horseshoe prior
-for the alphas
coefficients (i.e., the coefficients of the
+facilitate this selection, jm()
allows penalizing the
+regression coefficients using shrinkage priors. As an example, we refit
+jointFit3
by assuming a Horseshoe prior for the
+alphas
coefficients (i.e., the coefficients of the
longitudinal outcomes in the relative risk model):
jointFit4 <- update(jointFit3, priors = list("penalty_alphas" = "horseshoe"))
cbind("un-penalized" = unlist(coef(jointFit3)),
"penalized" = unlist(coef(jointFit4)))
#> un-penalized penalized
-#> gammas.Mean 0.2640842 -0.5163933
-#> association.slope(log(serBilir)) 4.4155400 2.2405660
-#> association.slope(log(serBilir)):sexfemale -3.7942189 -0.7917386
-#> association.area(prothrombin) -0.2818671 -0.1076875
-#> association.value(ascites) 0.9500576 0.8039421
Apart from the Horseshoe prior, the ridge prior is also provided.
diff --git a/docs/articles/JMbayes2_files/figure-html/unnamed-chunk-5-1.png b/docs/articles/JMbayes2_files/figure-html/unnamed-chunk-5-1.png index b58ec5ea..3a6a671c 100644 Binary files a/docs/articles/JMbayes2_files/figure-html/unnamed-chunk-5-1.png and b/docs/articles/JMbayes2_files/figure-html/unnamed-chunk-5-1.png differ diff --git a/docs/articles/Multi_State_Processes.html b/docs/articles/Multi_State_Processes.html index 2a1e78c8..bb1c6839 100644 --- a/docs/articles/Multi_State_Processes.html +++ b/docs/articles/Multi_State_Processes.html @@ -135,7 +135,7 @@vignettes/Multi_State_Processes.Rmd
Multi_State_Processes.Rmd
It is often the case that a subject may transition between multiple -states and we are interested to assess the association of longitudinal -marker(s) with each of these transitions. In this vignette we will -illustrate how to achieve this using JMbayes2.
+A subject may often transition between multiple states, and we are +interested in assessing the association of longitudinal marker(s) with +each of these transitions. In this vignette, we will illustrate how to +achieve this using JMbayes2.
We will consider a simple case with one longitudinal outcome and a three-state (illness-death) model, but this application can be extended for the cases of multiple longitudinal markers and more than three @@ -162,24 +162,23 @@
First we will simulate data from a joint model with a single linear +
First, we will simulate data from a joint model with a single linear mixed effects model and a multi-state process with three possible -states. The multi-state process can be visualized as:
- -where all subjects start from state “Healthy” and then can transition -to either state “Illness” and then state “Death” or directly to state -“Death”. In this case, states “Healthy” and “Illness” are +states. The multi-state process can be visualized as: +
+where all subjects start from the state “Healthy” and then can +transition to either state “Illness” and then state “Death” or directly +to state “Death.” In this case, states “Healthy” and “Illness” are transient states as the subject, when occupying these states, -can still transition to other states whereas “Death” is an absorbing +can still transition to other states, whereas “Death” is an absorbing state as when a subject reaches this state then no further transitions can occur. This means that three transitions are possible: \(1 \rightarrow 2\), \(1 \rightarrow 3\) and \(2 \rightarrow 3\) with transition intensities \(h_{12}\left(t\right)\), \(h_{13}\left(t\right)\) and \(h_{23}\left(t\right)\) respectively.
-For our example the default functional form is assumed, i.e., that +
For our example, the default functional form is assumed, i.e., that the linear predictor \(\eta(t)\) of the -mixed model is associated with the each transition intensity at time -\(t\). The following piece of code -simulates the data:
+mixed model is associated with each transition intensity at time \(t\). The following piece of code simulates +the data:
set.seed(1234)
# number of subjects
@@ -354,38 +353,38 @@ Data
#> 1.3 1 2 3 3 4.392984 9.094447 0 1.984455
#> 2.1 2 1 2 1 0.000000 1.552122 0 7.673231
#> 2.2 2 1 3 2 0.000000 1.552122 0 7.673231
for example subject 1 experienced the following transition: \(1 \rightarrow 2\) and therefore is +
for example, subject 1 experienced the following transition: \(1 \rightarrow 2\) and therefore is
represented in 3 rows, one for each transition, because all of these
-transitions were plausible. On the other hand subject 2 is only
+transitions were plausible. On the other hand, subject 2 is only
represented by two rows, only for transitions \(1 \rightarrow 2\) and \(1 \rightarrow 3\) since these are the only
-transitions that are possible from state 1. That is, since subject 2
-never actually transitioned to state 2, transition \(2 \rightarrow 3\) was never possible and
-therefore no row for this transition is in the dataset. It is also
+transitions possible from state 1. That is since subject 2 never
+actually transitioned to state 2, transition \(2 \rightarrow 3\) was never possible, and
+therefore, no row for this transition is in the dataset. It is also
important to note that the time in the dataset follows the counting
process formulation with intervals specified by Tstart
and
Tstop
and that there is a variable (in this case
-transition
) which indicates to which transition the row
-corresponds to.
transition
) indicating which transition the row corresponds
+to.
As soon data in the appropriate format are available, fitting the +
When the data in the appropriate format are available, fitting the
model is very straightforward. First we fit a linear mixed model using
the lme()
function from package nlme:
-mixedmodel <- lme(y ~ time + X, random = ~ time | id, data = DF)
then we fit a multi-state model using function coxph()
+mixedmodel <- lme(y ~ time + X, data = DF, random = ~ time | id)
Then, we fit a multi-state model using function coxph()
from package survival making sure we use the counting
process specification and that we add strata(transition)
to
stratify by the transition indicator variable in the dataset.
-Furthermore we add an interaction between covariate X
and
+Furthermore, we add an interaction between covariate X
and
each transition to allow the effect of this covariate to vary across
transitions.
msmodel <- coxph(Surv(Tstart, Tstop, status) ~ X * strata(transition),
data = data_mstate)
Finally, to fit the joint model we simply run:
+Finally, to fit the joint model, we simply run:
jm_ms_model <- jm(msmodel, mixedmodel, time_var = "time",
functional_forms = ~ value(y):transition, n_iter = 10000L)
@@ -432,12 +431,12 @@ Fitting the model#> iterations per chain: 10000
#> burn-in per chain: 500
#> thinning: 1
-#> time: 2.8 min
which differs from a default call to jm()
by the
-addition of the functional_forms
argument by which we
-specify that we want an “interaction” between the value of the marker
-and each transition which translates into a separate association
-parameter for the longitudinal marker and each transition.
functional_forms
argument specifying that
+we want an “interaction” between the marker’s value and each transition,
+which translates into a separate association parameter for the
+longitudinal marker and each transition.
diff --git a/docs/articles/Multi_State_Processes_files/figure-html/ms_figure-1.png b/docs/articles/Multi_State_Processes_files/figure-html/ms_figure-1.png
index fe2b6e37..03404b7a 100644
Binary files a/docs/articles/Multi_State_Processes_files/figure-html/ms_figure-1.png and b/docs/articles/Multi_State_Processes_files/figure-html/ms_figure-1.png differ
diff --git a/docs/articles/Non_Gaussian_Mixed_Models.html b/docs/articles/Non_Gaussian_Mixed_Models.html
index 379c7e3b..6bbf0577 100644
--- a/docs/articles/Non_Gaussian_Mixed_Models.html
+++ b/docs/articles/Non_Gaussian_Mixed_Models.html
@@ -135,7 +135,7 @@ vignettes/Non_Gaussian_Mixed_Models.Rmd
Non_Gaussian_Mixed_Models.Rmd
With very few exceptions, continuous outcomes that we wish to analyze -have some natural bounds. For example, the levels of blood biomarker for -a set of patients. However, often, the majority of the observations are -located far away from these natural bounds, and an assumption of a -normal distribution for the outcome can be safely done. In some settings +have some natural bounds. For example, the levels of blood biomarkers +for a set of patients. However, most observations are often located far +away from these natural bounds, and an assumption of a normal +distribution for the outcome can be safely made. In some settings, though, we can have outcomes for which a substantial percentage of the observations are located near the boundaries leading to skewed or -U-shaped distributions. For such longitudinal outcomes, a linear mixed -model with normal error terms often does not provide a good fit. A +U-shaped distributions. A linear mixed model with normal error terms +often does not provide a good fit for such longitudinal outcomes. A natural alternative is to select a distribution that respects the bounded nature of the outcome. The most well-known distribution for such outcomes is the Beta distribution defined in the \((0, 1)\) interval (note: a bounded outcome \(Y^*\) in the \((a, b)\) interval can be transformed to the \(Y = (Y^* - a) / (b - a)\) in the \((0, 1)\) interval).
-The following piece of code illustrates how to simulate data from a -joint model with a Beta mixed effects model. The default functional form -is assumed, i.e., that the linear predictor \(\eta(t)\) of the mixed model is associated +
The following code illustrates how to simulate data from a joint
+model with a Beta mixed effects model. The default functional form is
+assumed, i.e., that the linear predictor \(\eta(t)\) of the mixed model is associated
with the hazard of an event at time \(t\). The linear predictor is related to the
mean \(\mu(t)\) of the Beta
distribution under the logit link function, i.e., \(\log[\mu(t) / \{1 - \mu(t)\}] =
@@ -320,13 +320,13 @@ Outlying observations is a common occurring issue in practice.
-Several methods have been proposed in the literature for identifying
-such observations in the context of longitudinal data. However, removing
-such values from the analysis is in general not recommended unless we
-also have external information why these values are outlying. Hence, in
-these settings we would need to fit mixed models to accommodate these
-observations. A well-known approach to achieve this is to replace the
-normal distribution for the error terms in the linear mixed model with a
-Student’s-t distribution that has heavier tails. Outlying observations are a common issue in practice. Several methods
+have been proposed in the literature for identifying such observations
+in the context of longitudinal data. However, removing such values from
+the analysis is generally not recommended unless we also have external
+information as to why these values are outlying. Hence, we would need to
+fit mixed models to accommodate these observations in these settings. A
+well-known approach to achieve this is replacing the normal distribution
+for the error terms in the linear mixed model with a Student’s-t
+distribution with heavier tails. The following syntax simulates data from a joint model with a
Student’s-t mixed effects model: Count longitudinal outcomes are typically modeled with the Poisson
-distribution. However, often these outcomes exhibit more variance than
-what is allowed from the Poisson distribution leading to the well-known
-problem of over-dispersion. To accommodate this over-dispersion
-typically the Negative Binomial distribution is used. The following piece of code simulates data from a joint model for
-count longitudinal data that follow the Negative Binomial
+count longitudinal data that follow the negative binomial
distribution: As for count data also for Binomial data we may have the
-over-dispersion problem. In this case, we can change to standard
-Binomial distribution to a Beta-Binomial one to accommodate this. For count data and binomial data, we may have an over-dispersion
+problem. To accommodate this, we can change the standard binomial
+distribution to a beta-binomial one. The following piece of code simulates data from a joint model for
-binomial longitudinal data that follow the Beta-Binomial
+binomial longitudinal data that follow the beta-binomial
distribution: JMbayes2 provides also the capability to fit joint
-models with a recurrent event process possibly combined with a
+ JMbayes2 also provides the capability to fit joint
+models with a recurrent event process, possibly combined with a
terminating event. Recurrent events are correlated events that may occur
more than once over the follow-up period for a given subject. Our
current implementation allows for multiple longitudinal markers with
-different distributions, and various functional forms to link these
+different distributions and various functional forms to link these
markers with the risk of recurrent and terminal events. Furthermore, it
enables the risk intervals to be defined in terms of the gap or
calendar timescales. The two timescales use a different
@@ -164,9 +164,9 @@ A joint model with \(p\) normally
@@ -213,9 +213,9 @@ Censored linear mixed models
. For the
-longitudinal outcomes we specify linear mixed-effects models, and for
-the terminal and recurrence processes, we use proportional hazard
+
diff --git a/docs/articles/Recurring_Events.html b/docs/articles/Recurring_Events.html
index 6550329d..f6895fe7 100644
--- a/docs/articles/Recurring_Events.html
+++ b/docs/articles/Recurring_Events.html
@@ -135,7 +135,7 @@ set.seed(1234)
@@ -459,15 +459,15 @@
Censored linear mixed models
Students’s-t mixed models
-
@@ -602,12 +602,12 @@
Students’s-t mixed modelsNegative binomial mixed models
set.seed(1234)
@@ -734,7 +734,7 @@
Negative binomial mixed models#> iterations per chain: 3500
#> burn-in per chain: 500
#> thinning: 1
-#> time: 25 sec
Negative binomial mixed models
Beta-binomial longitudinal outcomes
-
set.seed(1234)
@@ -876,10 +876,7 @@
Beta-binomial longitudinal outcomes
#> iterations per chain: 3500
#> burn-in per chain: 500
#> thinning: 1
-#> time: 48 sec
Recurrent Events
Pedro Miranda
Afonso
- 2024-04-22
+ 2024-05-25
Source: vignettes/Recurring_Events.Rmd
Recurring_Events.Rmd
Recurrent events
Introduction
-Introduction
-
Introduction
-
@@ -175,10 +175,10 @@ Introduction\(i = 1, \ldots, n\)
where \(i = 1, \ldots, n\). We +specify linear mixed-effects models for the longitudinal outcomes, and +for the terminal and recurrence processes, we use proportional hazard models. The longitudinal and event time processes are linked via a latent structure of random effects, highlighted by the same color in the equations above. The terms \(\mathcal{f}_{R_j}\left\{\eta_{j_i}(t)\right\}\) @@ -224,7 +224,7 @@
We simulate data from a joint model with three outcomes: one -longitudinal outcome, one terminal failure-time, and one recurrent -failure-time. We assume that the underlying value of the longitudinal +longitudinal outcome, one terminal failure time, and one recurrent +failure time. We assume that the underlying value of the longitudinal outcome is associated with both risk models and use the gap timescale. The reader can easily extend this example to accommodate -multiple longitudinal markers with other forms of association, include -competing risks, consider only the recurrent events process, or use a -different timescale.
+multiple longitudinal markers with other forms of association, including +competing risks, considering only the recurrent events process, or using +a different timescale.
gen_data <- function(){
n <- 500 # desired number of subjects
@@ -397,11 +397,11 @@ Data
recu_data <- fake_data$rec # recurrent events data
lme_data <- fake_data$long # longitudial marker data
We now have three data frames, each one corresponding to a different
-outcome. To fit the joint model, the user needs to organize the
-failure-time data in the counting process formulation by combining the
-data for the terminal and recurrent events. Using then a strata variable
-to distinguish between the two processes. To facilitate this, we provide
-in the package the rc_setup()
function:
rc_setup()
function:
cox_data <- rc_setup(rc_data = recu_data, trm_data = term_data,
idVar = "id", statusVar = "status",
@@ -412,12 +412,12 @@ Data
their recurrent risk periods plus one for the terminal event. The data
frame follows the counting process formulation with the risk intervals
delimited by start
and stop
variables. The
-strata
variable denotes if the type of event,
-1
if recurrent or 2
terminal. The
-status
equals 1
if the subject had an event
-and 0
otherwise. As shown below and in Figure 3, the
-subject 1 experienced seven recurrent events during the follow up; the
-eighth recurrent event was censored by the terminal event.
+strata
variable denotes the type of event, 1
+if recurrent, or 2
terminal. The status
equals
+1
if the subject had an event and 0
otherwise.
+As shown below and in Figure 3, subject 1 experienced seven recurrent
+events during the follow-up; the terminal event censored the eighth
+recurrent event.
cox_data[cox_data$id == 1, c("id", "tstart", "tstop", "status", "strata")]
#> id tstart tstop status strata
@@ -431,19 +431,19 @@ Data
#> 8 1 3.6251219 4.0375415 0 R
#> 9 1 0.0000000 4.0375415 1 T1
-
The user then needs to use the nlme::lme()
function to
-first fit the linear mixed model that describes the longitudinal
+
The user then needs to use the nlme::lme()
function
+first to fit the linear mixed model that describes the longitudinal
outcome,
lme_fit <- lme(y ~ ns(time, k = c(1, 3), B = c(0, 7)), @@ -459,7 +459,7 @@
function. The user specifies the desired functional forms for the mixed model in each relative-risk model. And with theFitting the modeljm()
recurrent
-argument specifies the desired timescale, +argument specifying the desired timescale,jm_fit <- jm(cox_fit, lme_fit, time_var = "time", recurrent = "gap", functional_forms = ~ value(y):strata) diff --git a/docs/articles/Super_Learning.html b/docs/articles/Super_Learning.html index 30d696f7..9f21373d 100644 --- a/docs/articles/Super_Learning.html +++ b/docs/articles/Super_Learning.html @@ -136,7 +136,7 @@
Combined Dynamic Predictions via Super
Dimitris Rizopoulos
-2024-04-22
+2024-05-25
Source:vignettes/Super_Learning.Rmd
@@ -159,10 +159,10 @@Super_Learning.Rmd
Motivation and TheoryMotivation and Theory\(\mathcal{L} = \{M_1, \ldots, M_L\}\) denote a library with \(L\) models. -There are no restrictions to the models included in this library, and -actually, it is recommended to consider a wide range of possible models. -Among others, these joint models differ in the specification of the time -trend in the longitudinal submodels and the functions form and event -submodel. We split the original dataset \(\mathcal{D}_n\) in \(V\) folds. The choice of \(V\) will depend on the size and number of +There are no restrictions to the models included in this library, and it +is recommended to consider a wide range of possible models. Among +others, these joint models differ in the specification of the time trend +in the longitudinal submodels and the functions form and event submodel. +We split the original dataset \(\mathcal{D}_n\) in \(V\) folds. The choice of \(V\) will depend on the size and number of events in \(\mathcal{D}_n\). In particular, for each fold, we need to have a sufficient number of events -to robustly quantify the predictive performance. Using the +to quantify the predictive performance robustly. Using the cross-validation method, we fit the \(L\) models in the combined \(v-1\) folds, and we will calculate predictions for the \(v\)-th fold we left outside. Due to the dynamic nature of the predictions, we want to @@ -283,8 +283,8 @@
Example JMbayes2 in each worker. The output of this function should be a list of the fitted joint models with class
"jmList"
. Assigning this class to the resulting list will -facilitate combining the predictions later. For our example we use the -following specification: +facilitate combining the predictions later. For our example, we use the +following specifications:fit_models <- function (data) { library("JMbayes2") @@ -315,18 +315,18 @@
Example the longitudinal outcome
serBilir
and the composite event transplantation or death. The first three models consider a simple linear mixed effects model for serum bilirubin with random intercepts -and random slopes per subject, and no other covariates. Also, in the Cox -model for the composite event we do not specify any baseline covariates; -hence, the risk of the composite event depends only on serum bilirubin. -The three models differ in the corresponding functional forms, i.e., the -current value of log serum bilirubin, the current slope/velocity of log -serum bilirubin, and the current value plus the area under the log serum -bilurubin trajectory. The last models consider a more elaborate -specification of the linear mixed model that includes natural cubic -splines in both the fixed and random effects to allow for -non-linearities in the log serum bilirubin trajectories, and the main +and random slopes per subject and no other covariates. Also, in the Cox +model for the composite event, we do not specify any baseline +covariates; hence, the risk of the composite event depends only on serum +bilirubin. The three models differ in the corresponding functional +forms, i.e., the current value of log serum bilirubin, the current +slope/velocity of log serum bilirubin, and the current value plus the +area under the log serum bilirubin trajectory. The last models consider +a more elaborate specification of the linear mixed model that includes +natural cubic splines in both the fixed and random effects to allow for +non-linearities in the log serum bilirubin trajectories and the main effects of sex and age in both the mixed and Cox models. The functional -forms are again the current value and the current slope. +forms are, again, the current value and the current slope.We fit these models in the training datasets using parallel computing as facilitated using the parallel package (note: this and the subsequent computations require some time @@ -338,12 +338,12 @@
Example Models_folds <- parallel::parLapply(cl, CVdats$training, fit_models) parallel::stopCluster(cl)
We calculate the weights to combine the predictions from these five -models to optimize the integrated Brier score the expected predictive -cross-entropy at follow-up time \(t = -6\) years and for a relevant window of \(\Delta t = 2\) years. Function -
tvBrier()
automatically performs this task by providing as -a first argument the list of joint models fitted in the training -datasets. The integrated Brier score is calculated using the testing +models to optimize the integrated Brier score and the expected +predictive cross-entropy at follow-up time \(t += 6\) years and for a relevant window of \(\Delta t = 2\) years. The function +tvBrier()
automatically performs this task by providing the +list of joint models fitted in the training datasets as a first +argument. The integrated Brier score is calculated using the testing datasets that are provided in thenewdata
argument:We observe that the first two models dominate the weights. We also note that the integrated Brier score based on the combined predictions is smaller than the integrated Brier score of each individual model. To -calculate the model weights using the expected predictive cross-entropy +calculate the model weights using the expected predictive cross-entropy, use function
-tvEPCE()
with an almost identical call as for the Brier score:The EPCE results are similar with the ones from the integrated Brier +
The EPCE results are similar to those from the integrated Brier score; however, now only models
-M2
andM3
-share the weights. Again we see that the EPCE based on the combined +share the weights. Again, we see that the EPCE based on the combined cross-validated predictions is smaller than the EPCE based on the cross-validated predictions of each individual model.To put these weights in practice we need first to refit the five -joint models we considered in the original dataset.
+To use these weights in practice, we must first refit the five joint +models we considered in the original dataset.
-Models <- fit_models(pbc2)
Then we construct the dataset with the subjects at risk at year six +
Then, we construct the dataset with the subjects at risk at year six and consider the longitudinal measurements collected before this follow-up time. Also, we set in this dataset the observed event time to -six, and the event variable to zero, i.e., indicating that patients were +six and the event variable to zero, i.e., indicating that patients were event-free up to this time:
-ND <- pbc2[pbc2$years > tstr & pbc2$year <= tstr, ] diff --git a/docs/articles/Super_Learning_files/figure-html/unnamed-chunk-1-1.png b/docs/articles/Super_Learning_files/figure-html/unnamed-chunk-1-1.png index 752c4338..af0a6214 100644 Binary files a/docs/articles/Super_Learning_files/figure-html/unnamed-chunk-1-1.png and b/docs/articles/Super_Learning_files/figure-html/unnamed-chunk-1-1.png differ diff --git a/docs/articles/Time_Varying_Effects.html b/docs/articles/Time_Varying_Effects.html index 07f370c5..fefe3f27 100644 --- a/docs/articles/Time_Varying_Effects.html +++ b/docs/articles/Time_Varying_Effects.html @@ -135,7 +135,7 @@
Time Varying Effects
Dimitris Rizopoulos
-2024-04-22
+2024-05-25
Source:vignettes/Time_Varying_Effects.Rmd
@@ -160,14 +160,13 @@Time_Varying_Effects.Rmd
Non Proportional Hazards
pbc2.id$status2 <- as.numeric(pbc2.id$status != 'alive') CoxFit <- coxph(Surv(years, status2) ~ sex, data = pbc2.id)
Our aim is to assess the strength of the association between the risk -of the composite event and the level of serum bilirubin. We will -describe the patient-specific profiles over time for this biomarker -using a linear mixed-effects model, where both in the fixed- and -random-effects we include an intercept, the linear and quadratic time -effect. In the fixed-effects we also include the interaction of the time -effect and sex. The syntax to fit this model with
+lme()
-is:We aim to assess the strength of the association between the risk of +the composite event and the serum bilirubin level. We will describe the +patient-specific profiles over time for this biomarker using a linear +mixed-effects model, where we include an intercept in both the fixed and +random effects, as well as the linear and quadratic time effects. In the +fixed effects, we also include the interaction of the time effect and +sex. The syntax to fit this model with
lme()
is:@@ -219,13 +218,13 @@fm <- lme(log(serBilir) ~ poly(year, 2) * sex, data = pbc2, random = ~ poly(year, 2) | id, control = lmeControl(opt = 'optim'))
Non Proportional Hazards#> thinning: 1 #> time: 17 sec
To specify that the association of serum bilirubin may change over -time we include an interaction of this time-varying covariate with a +time, we include an interaction of this time-varying covariate with a natural cubic spline of time using function
+splines package. Important Note: For +this to work correctly, we need to explicitly specify the internal and +boundary knots for the B-splines basis, i.e., in the following example, +we set the internal knots at 3, 6, and 9 years, and the boundary knots +at 0 and 14.5 years:ns()
from the -splines package. Important Note: for -this to work correctly we need to explicitly specify the internal and -boundary knots for the B-splines basis, i.e., in the following example -we set the internal knots at 3, 6 and 9 years, and the boundary knots at -0 and 14.5 years:form_splines <- ~ value(log(serBilir)) * ns(year, k = c(3, 6, 9), B = c(0, 14.5)) jointFit2 <- update(jointFit1, functional_forms = form_splines, @@ -292,7 +291,7 @@
Non Proportional Hazards#> thinning: 1 #> time: 36 sec
The spline coefficients do not have a straightforward interpretation. -We therefore visualize the time-varying association of log serum +We, therefore, visualize the time-varying association of log serum bilirubin with the hazard of the composite event using the following piece of code:
-@@ -325,9 +324,9 @@Non Proportional Hazards#> jointFit2 4341.405 6355.209 -3701.785 #> #> The criteria are calculated based on the marginal log-likelihood.
Both the WAIC and LPML indicate that
+jointFit1
is a -better model thanjointFit2
. The DIC has the same magnitude -for both models.The WAIC and LPML indicate that
jointFit1
is a better +model thanjointFit2
. The DIC has the same magnitude for +both models.
vignettes/Transformation_Functions.Rmd
Transformation_Functions.Rmd
mixed_model()
is:
fm <- mixed_model(hepatomegaly ~ year, data = pbc2, random = ~ year | id,
@@ -214,11 +214,11 @@ Simplified syntax#> burn-in per chain: 500
#> thinning: 1
#> time: 15 sec
In the output this is named as value(hepatomegaly)
to
+
In the output, this is named value(hepatomegaly)
to
denote that the current value functional form is used. That is, we
assume that the risk at a specific time \(t\) is associated with the value of the
linear predictor of the longitudinal outcome at the same time point
-\(t\). In this particular case, the
+\(t\). In this case, the
subject-specific linear predictor denotes the log odds of experiencing
hepatomegaly at time \(t\).
jointFit1
by now linking the expit transformation of the
linear predictor (i.e., \(\mbox{expit}(x) =
\exp(x) / \{1 + \exp(x)\}\)) with the risk of an event. This is
@@ -274,12 +274,11 @@ Other available functions to use in the definition of the
-functional_forms
argument are vexp()
for
-calculating the exponent, vlog()
for calculating the
-natural logarithm, and vsqrt()
for calculating the square
-root.
functional_forms
argument are vexp()
to
+calculate the exponent, vlog()
to calculate the natural
+logarithm, and vsqrt()
to calculate the square root.
If we want to include the time-varying slope of the transformed
linear predictor, we also have the Dexpit()
and
Dexp()
functions available. As an example, we extend
@@ -381,8 +380,9 @@
+
summary(jFit2)$Survival
#> Mean StDev
#> sexfemale -0.1647358 0.2732504
diff --git a/docs/index.html b/docs/index.html
index f794a747..89ff4b22 100644
--- a/docs/index.html
+++ b/docs/index.html
@@ -135,7 +135,7 @@
The package JMbayes2 fits joint models for longitudinal and time-to-event data. It can accommodate multiple longitudinal outcomes of different type (e.g., continuous, dichotomous, ordinal, counts), and assuming different distributions, i.e., Gaussian, Student’s-t, Gamma, Beta, unit Lindley, censored Normal, Binomial, Poisson, Negative Binomial, and Beta-Binomial. For the event time process, right, left and interval censored data can be handled, while competing risks and multi-sate processes are also covered.
+The package JMbayes2 fits joint models for longitudinal and time-to-event data. It can accommodate multiple longitudinal outcomes of different type (e.g., continuous, dichotomous, ordinal, counts), and assuming different distributions, i.e., Gaussian, Student’s-t, Gamma, Beta, unit Lindley, censored Normal, Binomial, Poisson, Negative Binomial, and Beta-Binomial. For the event time process, right, left and interval censored data can be handled, while competing risks, multi-sate and recurrent-event processes are also covered..
JMbayes2 fits joint models using Markov chain Monte Carlo algorithms implemented in C++. Besides the main modeling function, the package also provides a number of functions to summarize and visualize the results.
jm()
now allows for zero-correlations constraints in the covariance matrix of the random effects. When the mixed models provided in the Mixed_objects
argument have been fitted assuming a diagonal matrix for the random effects, this will also be assumed in the joint model (in previous versions, this was ignored). In addition, the new argument which_independent
can be used to specify which longitudinal outcomes are to be assumed independent.jm()
now allows for zero-correlations constraints in the covariance matrix of the random effects. When the mixed models provided in the Mixed_objects
argument have been fitted assuming a diagonal matrix for the random effects, this will also be assumed in the joint model (in previous versions, this was ignored). In addition, the new argument which_independent
can be used to specify which longitudinal outcomes are to be assumed independent.
A bug in the predict()
method causing low AUC values has been corrected.
The time-varying ROC and AUC now allow to correct for censoring in the interval Tstart
to Thoriz
using inverse probability of censoring weighting. The default remains model-based weights
Package: | JMbayes2 |
Type: | Package |
Version: | 0.5-0 |
Date: | 2024-04-15 |
License: | GPL (>=3) |
This package fits joint models for longitudinal and time-to-event data. It can accommodate multiple longitudinal outcomes of different type (e.g., continuous, dichotomous, ordinal, counts), and assuming different distributions, i.e., Gaussian, Student's-t, Gamma, Beta, unit Lindley, censored Normal, Binomial, Poisson, Negative Binomial, and Beta-Binomial. For the event time process, right, left and interval censored data can be handled, while competing risks and multi-sate processes are also covered.
+Package: | JMbayes2 |
Type: | Package |
Version: | 0.5-0 |
Date: | 2024-04-24 |
License: | GPL (>=3) |
This package fits joint models for longitudinal and time-to-event data. It can accommodate multiple longitudinal outcomes of different type (e.g., continuous, dichotomous, ordinal, counts), and assuming different distributions, i.e., Gaussian, Student's-t, Gamma, Beta, unit Lindley, censored Normal, Binomial, Poisson, Negative Binomial, and Beta-Binomial. For the event time process, right, left and interval censored data can be handled, while competing risks and multi-sate processes are also covered.
JMbayes2 fits joint models using Markov chain Monte Carlo algorithms implemented in C++. The package also offers several utility functions that can extract useful information from fitted joint models. The most important of those are included in the See also Section below.
diff --git a/docs/reference/Rplot005.png b/docs/reference/Rplot005.png index 399a2103..e9b8d820 100644 Binary files a/docs/reference/Rplot005.png and b/docs/reference/Rplot005.png differ diff --git a/docs/reference/Rplot006.png b/docs/reference/Rplot006.png index d6ed83c3..cacccea3 100644 Binary files a/docs/reference/Rplot006.png and b/docs/reference/Rplot006.png differ diff --git a/docs/reference/Rplot007.png b/docs/reference/Rplot007.png index aecd8621..81027cb8 100644 Binary files a/docs/reference/Rplot007.png and b/docs/reference/Rplot007.png differ diff --git a/docs/reference/Rplot008.png b/docs/reference/Rplot008.png index 77be071b..caa344b8 100644 Binary files a/docs/reference/Rplot008.png and b/docs/reference/Rplot008.png differ diff --git a/docs/reference/accuracy-1.png b/docs/reference/accuracy-1.png index 4ab302e8..3cddbb26 100644 Binary files a/docs/reference/accuracy-1.png and b/docs/reference/accuracy-1.png differ diff --git a/docs/reference/accuracy.html b/docs/reference/accuracy.html index 610a0846..a34254a3 100644 --- a/docs/reference/accuracy.html +++ b/docs/reference/accuracy.html @@ -105,13 +105,13 @@the seed used in the sampling procedures. Defaults to 123.
MALA
a logical
; if TRUE, the MALA algorithm is used when updating the elements
+
logical
; if TRUE
, the MALA algorithm is used when updating the elements
of the Cholesky factor of the D matrix. Defaults to FALSE
.
save_random_effects
a logical
; if TRUE, the full MCMC results of the random
+
logical
; if TRUE
, the full MCMC results of the random
effects will be saved and returned with the jm
object. Defaults to FALSE
.
save_logLik_contributions
logical
; if TRUE
, the log-likelihood contributions are
+ saved in the mcmc
component of the jm
object. Defaults to FALSE
cores
an integer specifying the number of cores to use for running the chains in
parallel; no point of setting this greater than n_chains
.
parallel
a character string indicating how the parallel sampling of the chains will
+ be performed. Options are "snow"
(default) and multicore
.
knots
a numeric vector with the position of the knots for the B-spline approximation of the log baseline hazard function.