############################################## # Analysis of Longitudinal Data (SuSe 2017) # # # # Solution for tutorial 5 # # Datum: 04.07.2017 # ############################################## ############# # Exercise 1# ############# #a) # load dataset vitamin load("../Vorher_Online/vitamin.Rdata") # get first overview str(vitamin) #b) # load package nlme library(nlme) # estimate linear mixed model with random intercepts and random slopes # and with a fixed effect for time and for the interaction of time and group m_RIRS<-lme(y~time*group-group,random=~time|child,data=vitamin) summary(m_RIRS) #c) # --> We do not need a fixed main effect for group as group is randomized. # Thus, at the first time point, before the vitamin/placebo is given, # there should be no effect of group. #d) # --> The plot can show misspecifications of the mean structure and can indicate missing random slopes. # There should be no systematic trend. #e) # extract population-specific residuals vitamin$residuals_RIRS<-resid(m_RIRS,level=0) # extract predictions on population level = estimated mean vitamin$preds_RIRS<-predict(m_RIRS,level=0) # plot population-specific residuals agains estimated mean library(ggplot2) ggplot(vitamin, aes(preds_RIRS, residuals_RIRS)) + geom_point() + geom_smooth(aes(col = child), alpha = .3, method = "loess", se = FALSE) + geom_smooth(method = "loess", se = FALSE, size = 3, col = "black") + geom_hline(yintercept = 0, lty = "dashed") # --> There is a systematic (quadratic) trend indicating a misspecification of the mean structure. #h) # --> The population-specific residuals have zero mean, but are correlated and heteroscedastic and one therefore has # to be careful with the diagnosis. # --> Transformed residuals with Cholesky transformation (compare lecture slides 7 and tutorium) ######################################################################################################## ############# # Exercise 2# ############# #a) # transform time to log(time) vitamin$ltime<-log(vitamin$time+1) # now estimate model using ltime with ML (alternatively use update) m_RIRSlog<-lme(y~ltime*group-group,random=~ltime|child,data=vitamin,method="ML") # estimate model using time with ML (alternatively use update) m_RIRS<-lme(y~time*group-group,random=~time|child,data=vitamin,method="ML") # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ # Briefly check if model diagnosis is better using ltime: # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ # extract population-specific residuals vitamin$residuals_RIRSlog<-resid(m_RIRSlog,level=0) # extract predictions on population level = estimated mean vitamin$preds_RIRSlog<-predict(m_RIRSlog,level=0) # plot population-specific residuals agains estimated mean library(ggplot2) ggplot(vitamin, aes(preds_RIRSlog, residuals_RIRSlog)) + geom_point() + geom_smooth(method = "loess", se = FALSE, size = 2) + geom_hline(yintercept = 0, lty = "dashed") #b) # --> The models are NOT nested and thus a LR test is not appropriate. # We can compare the two models using their marginal AICs (mAICs) as they were estimated using ML and # because we are comparing the fixed effects. # The smaller the mAIC, the better. anova(m_RIRS,m_RIRSlog) # --> The model using ltime is better as it has the smaller mAIC (this corresponds to the residual diagnosis). #c) # --> The REML likelihoods for different fixed effects are not comparable (see lecture slides 7 and compare testing # fixed effects). #d) # --> For the marginal AIC, we assume that two independent replications come from the same marginal distribution, # but do not share the same random effects. # Thus, the marginal AIC is appropriate when the focus is on the population-level fixed effects. # --> For the conditional AIC, we assume that two independent replications come from the same conditional distribution, # and share the same random effects. # Thus, the conditional AIC is appropriate when the focus is on the subjects or random effects. #e) # estimate model using time with ML m_RIlog<-lme(y~ltime*group-group,random=~1|child,data=vitamin,method="ML") # compare the two mAICs anova(m_RIRSlog,m_RIlog) # check that the marginal AIC is used in anova() although we once have a random intercept and slope # and once a random intercept only. -2*anova(m_RIRSlog,m_RIlog)$logLik[1]+2*anova(m_RIRSlog,m_RIlog)$df[1] -2*anova(m_RIRSlog,m_RIlog)$logLik[2]+2*anova(m_RIRSlog,m_RIlog)$df[2] # --> Here, the simpler model (without random slopes) is prefered. # Note: There is a bias which induces a preference for models with fewer random effects (compare lecture slides 7). #f) # load packages library(lme4) library(cAIC4) # fit model with and without random slopes using lmer m_RIRSlog_lmer<-lmer(y~ltime*group-group+(ltime|child),data=vitamin) m_RIlog_lmer<-lmer(y~ltime*group-group+(1|child),data=vitamin) # compute cAIC of model with random slope cAIC(m_RIRSlog_lmer)$caic # compute cAIC of model without random slope cAIC(m_RIlog_lmer)$caic # --> Model with random slope is prefered and thus the cAIC yields the opposite decision as the mAIC in e). ######################################################################################################## ############# # Exercise 3# ############# #a) Your turn! #b) # load package MASS library(MASS) # data epil ?epil #(i) # --> A generalized linear mixed model (GLMM) with poisson distribution would be appropriate as we are interested # in subject-specific effects. #(ii) # --> A marginal model (using GEE) would be appropriate as we are interested in population effects. #(iii) # --> In general (for general link functions), the effect in the GLMM would have to be interpreted conditional on the random effects and is thus a # subject-specific parameter, no population parameter. # In general, The correct interpretation is: "...FOR A GIVEN PATIENT with this treatment is ..." # --> In general, the interpretation is not the same as in the marginal model we get a population parameter ("on average"). # Note however, that their are some exceptions: # 1. for identity link (LMM), the interpretation in the LMM and in the corresponding marginal model are the same. # 2. for log-link (e.g. count data), all fixed effects can be interpreted marginally for which there is no random effects, # e.g. in a random intercept model with log-link, all fixed effects except for the intercept can be interpreted marginally. # (see slides 10 and Diggle et al, 2002, p. 137).