############################################## # Analysis of Longitudinal Data (SuSe 2017) # # # # Solution for lab session 4 # # Datum: 20.06.2017 # ############################################## ############# # Exercise 1# ############# #a) # --> When we test the requirement of random effects, we test if their variance is equal or larger zero. # Thus, we sometimes speak of testing the random effects and sometimes testing the variance structure. #b) # load package nlme library(nlme) # load data from homepage # the data are already in a groupedData format load("../antibiotics.Rdata") head(antibiotics) str(antibiotics) # getGroups(antibiotics) # estimate random intercept model m_RI <- lme(y ~ time,random = ~1|child,data=antibiotics) summary(m_RI) # add a random slope for each child m_RIRS<-update(m_RI,random=~time|child) summary(m_RIRS) #c) # Likelihood-Ratio test (RI_vs_RIRS<-anova(m_RI,m_RIRS)) #(i) # --> Null hypothesis: $H_0: d_{12}= d_{22}=0$. #(ii) Decision # --> H_0 can not be rejected to an alpha niveau of 0.05. #(iii) Why problematic? # --> We test a variance parameter which is restricted with a lower boarder of 0. # Under certain regularity assumptions (among others that the null hypotheses does not # lie on the boundary of the parameter space), the test statistics is asymptotically Chi^2 # distributed with the degrees of freedom corresponding to the difference of the dimensions of the # subspace of the parameter space (more in the lab session). #d) # get mixture distribution chisqmix <- 0.5*pchisq(RI_vs_RIRS$L.Ratio[2],df=1) + 0.5*pchisq(RI_vs_RIRS$L.Ratio[2],df=2) # corresponding p-value: (pchisqmix <- 1 - chisqmix) # alternative computation 0.5*(1-pchisq(RI_vs_RIRS$L.Ratio[2],df=1)) + 0.5*(1-pchisq(RI_vs_RIRS$L.Ratio[2],df=2)) # --> We can thus reject H_0 to an alpha niveau of 0.05. The p-value is smaller than in the conservative test in c) # and leads to a different decision. #e) # --> As the fixed effects stay the same for the two hypotheses, we do not run into trouble using REML. #f) #(i) # --> Null hypothesis: $H_0: d_{12}=d_{21}=0$. #(ii) # model with uncorrelated random effects m_RIRS_unc<-update(m_RIRS,random = pdDiag(~1+time)) summary(m_RIRS_unc) anova(m_RIRS,m_RIRS_unc) # --> The null hypothesis cannot be rejected to an alpha niveau of 0.05 as the p-value is 0.87. # That is, The model can be simplified and the random intercept # and random slope can be modeled as independent. #(iii) # --> Yes, the asymptotic distribution is a $\chi^2$ distribution with the degrees of freedom # corresponding to the difference of the dimensions of the subspace of the parameter space, # because the null hypothesis does not lie on the border of the parameter space. ############# # Exercise 2# ############# #a) # load package mgcv library(mgcv) ?gamm # -> gamm objects constist of two parts, a gam part and an lme part. # For more detaisl, see ?gam and ?lme. #b) # estimate model with uncorrelated random intercepts and random slopes for each child and a smooth effect of time. m_RIRS_gamm<-gamm(y~s(time),random=list(child=pdDiag(~1+time)),data=antibiotics) # Note: defaults of s() are used here, for more details, see ?s. summary(m_RIRS_gamm$lme) summary(m_RIRS_gamm$gam) #c) # plot effect of time plot(m_RIRS_gamm$gam) # --> linear effect, no smooth effect needed here. We could test that! #d) covariance matrix of random effects summary(m_RIRS_gamm$lme) # --> The random effects are independent (as specified in the model formula). The covariance matrix can thus # be extracted from the summary and has the form: # |(2.892095)^2 0 | # |0 (0.006607523)^2 | ############# # Exercise 3# ############# #a) # --> Conditional independence. For models with a random intercept only, this also implies compound symmetry. #b) # --> In corStruct, no additional iid error is assumed: \Sigma_i= \tau^2H_i #c) # --> Compare lab session, tutorial and lecture slides. #d) # --> The marginal correlation Corr(Y_{ij},Y_{ik})=(d^2 + \tau^2 g(|t_{ij}-t_{ik}|))/(d^2 + \sigma^2 + \tau^2), compare # lecture slides. #e) #(i) # --> Empirical Semi-Variogram, more details see lecture slides, lab session, and tutorial. #(ii) library(nlme) # load data load("../Vorher_online/rats.long.RData") # transform covariate group in factor variable str(rats.long) # transform time rats.long$logT <- log(1 + (rats.long$TIME - 50) / 10) # note: now day 50 as first measurement taken at age 50 and here +1 # take out NAs rats.long <- rats.long[complete.cases(rats.long),] # estimate linear model under independence assumption lm1 <- lm(RESPONSE ~ GROUP * logT - GROUP, data = rats.long) # get residuals r = resid(lm1) # get transformed time t = rats.long$logT # get ids id = rats.long$SUBJECT # half squared residuals for all observations half.squared<- outer(r, r, FUN = function(r1, r2) (r1 - r2)^2/2) # absolute distances for all observations distances <- outer(t, t, FUN = function(t1, t2) abs(t1 - t2)) # get index matrix specifying if same subject samei <- outer(id, id, FUN = function(i1, i2) (i1 == i2)) # matrix with 0 on the diagonal and 1 for non-diagonal values as 0 on diagonal nondiag <- 1 - diag(length(r)) u <- as.vector(distances[samei & nondiag]) v <- as.vector(half.squared[samei & nondiag]) # total variance: \sigma^2 + d^2+ \tau^2 totvar <- mean(half.squared[!samei]) # smooth smooth_mean <- lowess(u,v,iter=0) # note that iter=0 such that the not the median is approximated # plot empirical semi-variogram par(mar=c(5.1, 5.1, 4.1, 2.1)) plot(u, v, pch = 16, cex = 0.5, col = 8,ylim=c(0,totvar),xlim=c(0,2.005), xlab="distance of logT (u)",ylab=expression(paste(hat("v"),"(u)")),main="Empirical Semi-Variogram") lines(smooth_mean, lwd = 2,t="b") abline(h = totvar, col = 2, lwd = 2, lty = 2) # add axes axis(2,at=1.37,labels=expression(paste(hat(sigma)^2)),lwd=2,col.ticks="red") axis(4,at=4.71,labels=expression(paste(hat(sigma)^2,"+",hat(d)^2,"+",hat(tau)^2)),lwd=2,col.ticks="red") abline(h=1.37,lty=2) #(iii) Conclusion # --> We can conclude that no serial correlation needs to be included in the model. The empiric semi-variogram # gives us a starting value for the error variance. # estimate model with random intercept without serial correlation for comparison lme1 <- lme(RESPONSE ~ GROUP * logT - GROUP,random=~1|SUBJECT, data = rats.long) # extract d^2 d2<-getVarCov(lme1)[1] # extract error variance sigma2<-lme1$sigma # compare sigma2 with that extracted from the empirical semi-variogram sigma2 # compare totvar and d2 and sigma2 d2+sigma2 # fitted variogram # --> \hat{v}(u) = sigma2