############################################## # Analysis of Longitudinal Data (SuSe 2017) # # # # Solution for tutorial 6 # # Datum: 18.07.2017 # ############################################## ############# # Exercise 1# ############# # a) # load package (for data) library(drm) # load data data(madras) # first overview ?madras head(madras) str(madras) # b) # --> See tutorial and lecture slides. # c) # --> The likelihood is a product of N q-dimensional integrals over the random effects. # These integrals can usually not be solved analytically and therefore numerical approximations have to be used. # The estimation is then performed iteratively. # d) # --> PQL: approximation of the data # --> Laplace approximation: approximation of the integrand # --> Gaussian quadratur: approximation of the integral # e) # Estimate logistic GLMM using all three approximations # approximation of the data (PQL): using function glmmPQL in package MASS library(MASS) ?glmmPQL glmm_RI_PQL<-glmmPQL(fixed=symptom~month+sex,random=~1|id,family=binomial,data=madras) summary(glmm_RI_PQL) # approximation of the integrand (Laplace): using function glmer in package lme4 library(lme4) ?glmer system.time(glmm_RI_LA<-glmer(symptom~month+sex+ (1|id),family=binomial(),data=madras)) # note: default is nAGQ=1 which correponds to Laplace approximation summary(glmm_RI_LA) # approximation of the integral (Gauss-Hermite):using function glmer in package lme4 system.time(glmm_RI_AGQ<-glmer(symptom~month+sex+ (1|id),family=binomial(),data=madras,nAGQ=11)) summary(glmm_RI_AGQ) # compare estimates of fixed effects # consider exp(coef) as logit link exp(fixef(glmm_RI_PQL)) # intercept: 4.75, month: 0.60, sex: 0.36 exp(fixef(glmm_RI_LA)) # intercept: 5.17, month: 0.58, sex: 0.31 exp(fixef(glmm_RI_AGQ)) # intercept: 5.17, month: 0.58, sex: 0.31 # --> Laplace and AGQ lead to very similar estimates of the fixed effects. The estimates for PQL # are slightly different, but also similar. # f) # --> PQL uses an approximate likelihood and is exact only for LMMs. It gives better approximations for # larger n_i and for Y_{ij} closer to normal. # For binary data, we have to be especially cautious as PQL approximation can # be seriously biased if the numbers of observations per subject are small. # Here, we have 12 measurements per subject which should be fine but we have a high number # of missings for some subjects and thus 12 is only the maximum number. # g) # Interpretation (of AGQ estimation) # Intercept: exp(Intercept) = 5.17 # --> The odds of ``thought disorder'' for males (reference category) with random effect 0 # would be 5.17 at the first time point (month 0), but for each subject they differ due to the random intercept # Month: exp(month) = 0.58 # --> Per month under treatment, the odds of ``thought disorder'' FOR A GIVEN PATIENT are almost halved # (ceteris paribus, i.e. here same sex). Or: are 0.58 times higher (0.42 times lower). # See tutorial why: exp() / exp() -> cancel out, when b_i the same # Sex: exp(sex) = 0.31 # --> The odds of ``thought disorder'' for a female is 0.31 times of those, the same person would have if she was male # (ceteris paribus, i.e. here at the same time point / month, with the same random effect ). # (i) # --> As generally the link function is not linear, the relation # E(Y_{ij}) = h(x_{ij}^\top \beta) which holds for the LMM does not hold for the GLMM anymore. # Thus, we cannot interpret the parameters on population level but only on subject level. # (ii) # plot of individual probability curves # extract fixed effects intercept<-fixef(glmm_RI_AGQ)[["(Intercept)"]] beta1<-fixef(glmm_RI_AGQ)[["month"]] beta2<-fixef(glmm_RI_AGQ)[["sex"]] subjects<-1:length(unique(madras$id)) # subjects in the data set madras$id<-factor(rep(subjects,each=12)) # Save id as factor variable # fixed part of the predictor predictor_fixed<-matrix( intercept + beta1*madras$month + beta2*madras$sex , nrow=length(subjects),ncol=12, byrow = TRUE) # predictor (fixed and random part) predictor<-predictor_fixed+ ranef(glmm_RI_AGQ)$id$'(Intercept)' # individual probabilities p_indiv <- exp(predictor)/(1+exp(predictor)) # which patients are male, which are female male<-as.numeric(unique(subset(madras,sex==0)$id)) female<-subjects[-male] p_indiv_male<-p_indiv[male,] p_indiv_female<-p_indiv[female,] par(mfrow=c(1,2)) ######### # male ######### # individual matplot(0:11,t(p_indiv_male),t="l",col="green",main="male") ######### # female ######### # individual matplot(0:11,t(p_indiv_female),t="l",col="green",main="female") # (iii) par(mfrow=c(1,2)) ######### # male ######### # individual matplot(0:11,t(p_indiv_male),t="l",col="green",main="male") # with b_i=0 (probability for average male patient) p_0_male <- exp(predictor_fixed[male,])/(1+exp(predictor_fixed[male,])) lines(0:11,p_0_male[1,],col="red",lwd=2) ######### # female ######### # individual matplot(0:11,t(p_indiv_female),t="l",col="green",main="female") # with b_i=0 (probability for average female patient) p_0_female <- exp(predictor_fixed[female,])/(1+exp(predictor_fixed[female,])) lines(0:11,p_0_female[1,],col="red",lwd=2) # (iv) par(mfrow=c(1,2)) ######### # male ######### # individual matplot(0:11,t(p_indiv_male),t="l",col="green",main="male") # with b_i=0 (probability for average male patient) p_0_male <- exp(predictor_fixed[male,])/(1+exp(predictor_fixed[male,])) lines(0:11,p_0_male[1,],col="red",lwd=2) # with average (average probability in population of male patients) p_average_male <- colMeans(p_indiv_male) lines(0:11,p_average_male,col="blue",lwd=2) legend(x="topright",legend=c(expression(paste("P(Y"[ij], " = 1 | b"[i],")")), expression(paste("P(Y"[ij], " = 1 | b"[i], " = 0)")), expression(paste("P(Y"[ij], " = 1 )"))), col=c("green","red","blue"),lwd=c(1,3,3)) ######### # female ######### # individual matplot(0:11,t(p_indiv_female),t="l",col="green",main="female") # with b_i=0 (probability for average female patient) p_0_female <- exp(predictor_fixed[female,])/(1+exp(predictor_fixed[female,])) lines(0:11,p_0_female[1,],col="red",lwd=2) # with average (average probability in population of female patients) p_average_female <- colMeans(p_indiv_female) lines(0:11,p_average_female,col="blue",lwd=2) legend(x="topright",legend=c(expression(paste("P(Y"[ij], " = 1 | b"[i],")")), expression(paste("P(Y"[ij], " = 1 | b"[i], " = 0)")), expression(paste("P(Y"[ij], " = 1 )"))), col=c("green","red","blue"),lwd=c(1,3,3)) # Interpretation # --> Demonstration that the parameters of the GLMM cannot be interpretated marginally (on population level). # Plot is done separately for women and men as we can only make a plot like this for a single continuous covariate. # Compare also lecture slides. # h) # (i) # --> As the PQL estimation is based on the likelihood of pseudo data, no likelihood ratio test is possible. # (ii) # --> For the same reason as in (a). A quasi likelihood is used and only the first two conditional moments E(Y_{ij}|b_i) and # Var(Y_{ij}|b_i) are specified (plus penalty term for the random effects).