1 Introduction

In this vignettes, we present a case study we encountered in the simulation study for package multilevelcoda. Briefly, we found that multilevel model with compositional predictors with large sample size, large between-person heterogeneity and small within-person heterogeneity (large \(\sigma^2_{u}\) and small \(\sigma^2_{\varepsilon}\)) produced low bulk effective sample size (ESS). Here, we examined the diagnostics of the model of interest and explored different methods to improve the within-chain autocorrelation.

2 Generating Data from Simulation Study

We first generated a dataset consisting of a 3-part behaviour composition with 1200 individuals, 14 observations per individuals, and large random intercept variation (\(\sigma^2_{u} = 1.5\)), coupled with small residual variation (\(\sigma^2_{\varepsilon}: 0.5\)).

set.seed(1) 
sampled_cond <- cond[condition == "RElarge_RESsmall" & n_parts == 3 & N == 1200 & K == 14][1]
i <- 1

# condition
N             <- sampled_cond[i, N]
K             <- sampled_cond[i, K]
rint_sd       <- sampled_cond[i, rint_sd]
res_sd        <- sampled_cond[i, res_sd]
run           <- sampled_cond[i, run]
n_parts       <- sampled_cond[i, n_parts]
sbp_n         <- sampled_cond[i, sbp]
prefit_n      <- sampled_cond[i, prefit]
groundtruth_n <- sampled_cond[i, groundtruth]
parts         <- sampled_cond[i, parts]

# inputs
sbp           <- meanscovs[[paste(sbp_n)]]
prefit        <- get(prefit_n)
groundtruth   <- get(groundtruth_n)
parts         <- as.vector(strsplit(parts, " ")[[1]])

simd <- with(meanscovs, rbind(
  simulateData(
    bm = BMeans,
    wm = WMeans,
    bcov = BCov,
    wcov = WCov,
    n = N,
    k = K,
    psi = psi)
))

simd[, Sleep := TST + WAKE]
simd[, PA := MVPA + LPA]
# ILR ---------------------------------------------------
cilr <- compilr(
  data = simd,
  sbp = sbp,
  parts = parts,
  idvar = "ID")

tmp <- cbind(cilr$data,
             cilr$BetweenILR,
             cilr$WithinILR,
             cilr$TotalILR)

# random effects ----------------------------------------
redat <- data.table(ID = unique(tmp$ID),
                    rint = rnorm(
                      n = length(unique(tmp$ID)),
                      mean = 0,
                      sd = rint_sd))

tmp <- merge(tmp, redat, by = "ID")

# outcome -----------------------------------------------
if (n_parts == 3) {
  tmp[, sleepy :=  rnorm(
    n = nrow(simd),
    mean = groundtruth$b_Intercept  + rint +
      (groundtruth$b_bilr1 * bilr1) +
      (groundtruth$b_bilr2 * bilr2) +
      (groundtruth$b_wilr1 * wilr1) +
      (groundtruth$b_wilr2 * wilr2),
    sd = res_sd)]
}           
simd$sleepy <- tmp$sleepy

cilr <- compilr(simd, sbp, parts, total = 1440, idvar = "ID")
dat <- cbind(cilr$data, cilr$BetweenILR, cilr$WithinILR)

Here is the dataset dat, along with our variables of interest.

knitr::kable(head(dat[, .(ID, sleepy, Sleep, PA, SB,
                          bilr1, bilr2, wilr1, wilr2)]))
ID sleepy Sleep PA SB bilr1 bilr2 wilr1 wilr2
1 2.91 437 267.9 735 0.466 -1.16 -0.478 0.447
1 1.02 665 116.2 659 0.466 -1.16 0.250 -0.067
1 1.89 526 209.8 704 0.466 -1.16