################################################################################ # Exercise 1 # GLMM ################################################################################ library(RColorBrewer) palette(brewer.pal(9,'Set1')) library(ggplot2) ###### (1a) prepare data load("madras.Rdata") str(madras) # # cumulative number of attacks per person # madras$count <- unlist(tapply(madras$y,madras$id,cumsum)) # madrasC: only keep patients with all 12 observations completeId <- names(table(madras$id)[table(madras$id)==12]) madrasC <- subset(madras, madras$id %in% completeId) madrasC$id <- factor(madrasC$id) # drop unused levels # control table(madrasC$id) ###### (1b) plot data ## possible solution: # calculate relative frequency of attacks over time for males and femals yMatMales <- matrix(subset(madrasC$y, madrasC$gender=="Male"), nrow=12) relFMales <- apply(yMatMales, 1, mean) yMatFemales <- matrix(subset(madrasC$y, madrasC$gender=="Female"), nrow=12) relFFemales <- apply(yMatFemales, 1, mean) # add random noise to response and to month before plotting madrasC$yN <- madrasC$y + runif(length(madrasC$y), -0.05, 0.05) madrasC$monthN <- madrasC$month + runif(length(madrasC$month), -0.2, 0.2) #pdf("madrasCdata.pdf", height=6, width=6) with(madrasC, plot(yN ~ monthN, col=gender, main="rel. H?ufigkeit der Sch?be ?ber die Zeit", ylab="y", xlab="month")) m <- 0:11 lines(m, predict(loess(relFMales~m)), col=1, lwd=2) lines(m, predict(loess(relFFemales~m)), col=2, lwd=2) #dev.off() ###### (1c) model with random intercept library(lme4) ?glmer m1 <- glmer(y ~ month + gender + (1|id), family=binomial(), data=madrasC) summary(m1) fixef(m1) exp(fixef(m1)) #ranef(m1) summary(m1)$varcor ###### (1d) odds ratio for sex ### conditional odds ratio of sex (individual parameter) exp(fixef(m1)[3]) #### two different solution to approximate the marginal odds ratio ## Solution 1: simulate data from the model to estimate the conditional expectations ## Solution 2: use the predicted b_i from the data to estimate the conditional expectations ################### Solution 1 by Lisa Steyer and Tore Erdmann #### approximate conditional expectation by simulating from the model ## E_b(P(y=1|x,b)) pr_y1 <- function(x, beta, b) { mean(exp(x %*% beta + b) / (1 + exp(x %*% beta + b))) } ## E_b(P(y=0|x,b)) pr_y0 <- function(x, beta, b) { mean( 1 / (1 + exp(x %*% beta + b))) } # get estimated sd of random intercepts tau <- sqrt(summary(m1)$varcor$id[1]) # ## estimate sd of random intercepts, by using the predictions of the b_i s # ## smaller than direct estimate! (gives a result closer to Solution 2) # tau <- sd(ranef(m1)[[1]][[1]]) ## compute Odds ratios at monthi # n number of simulated observations from the model # fixef(m1): beta_0, beta_month, beta_sex # for men: x=c(1, monthti, 0) # for women: x=c(1, monthti, 1) f1 <- function(monthi, n = 10000) { (pr_y1(x = c(1, monthi, 1), beta = fixef(m1), b = rnorm(n = n, sd = tau)) / pr_y0(x = c(1, monthi, 1), beta = fixef(m1), b = rnorm(n = n, sd = tau)) ) / (pr_y1(x = c(1, monthi, 0), beta = fixef(m1), b = rnorm(n = n, sd = tau)) / pr_y0(x = c(1, monthi, 0), beta = fixef(m1), b = rnorm(n = n, sd = tau)) ) } # apply function f1() to the month 0:11 margORtime1 <- sapply(0:11, function(i) f1(i, n = 100000)) #plot(x = 0:11, y = margORtime1) ## mean overall odds ratio margOR1 <- mean(margORtime1) ################### Solution 2 ### approximate marginal odds ratio of sex (population parameter) # is the person female? female <- madrasC$gender[seq(1, by=12, l=length(table(madrasC$id)))]=="Female" # random effects females bFemale <- ranef(m1)$id$'(Intercept)'[female] # random effects males bMale <- ranef(m1)$id$'(Intercept)'[!female] # get the fixed effects time <- 0:11 beta0 <- fixef(m1)[1] beta1 <- fixef(m1)[2] beta2 <- fixef(m1)[3] # probabilities of attacks for femals depending on b and month # one woman per column, time in rows pFemale <- sapply(bFemale, function(b){exp(beta0 + beta1 * time + beta2 + b) / (1 + exp(beta0 + beta1 * time + beta2 + b))}) # probabilities of attacks for males depending on b and month # one man per column, time in rows pMale <- sapply(bMale, function(b){exp(beta0 + beta1 * time + b) / (1 + exp(beta0 + beta1 * time + b))}) # odds ratios for time=0,...,11 - expectation is appriximated by the mean over all b margORtime2 <- ( rowMeans(pFemale) /rowMeans(1-pFemale) ) / ( rowMeans(pMale) /rowMeans(1-pMale) ) # mean marginal odds ratio over all b and times = approximation of expectation margOR2 <- mean(( rowMeans(pFemale) /rowMeans(1-pFemale)) / ( rowMeans(pMale) /rowMeans(1-pMale))) ############################################## # compare with conditional odds ratio of sex exp(fixef(m1)[3]) margOR1 ## use Solution 1 margOR2 # marginal odds ratio of 0.37 closer to 1 than conditional odds ratio of 0.21 # this means that the marginal odds ratio is less extreme, # i.e. there is less effect on the population-level than on the person-level # see 1(h) for a graphical illustration # intuition: the larger the sd of the random effects, the bigger gets # the difference between conditional and marginal OR ############## plot marginal and conditional odds ratio for comparison plot(0:11, margORtime2, type="b", col=1) lines(x = 0:11, y = margORtime1, type="b", col=2) abline(h=exp(beta2), col=4, lwd=2) abline(h=mean(margORtime2), col=1, lwd=2) abline(h=mean(margORtime1), col=2, lwd=2) legend("topright", fill=c(c(1,2,4)), legend=c("marginal, predicted b_i", "marginal, simulated b_i", "conditional") ) ### comment: if prior interest is in population parameter: ### fit a generalized estimating equation model (GEE), R-package gee ############################################## # plot probabilities over time for males and females par(mfrow=c(1,2)) matplot(time, pFemale, type="l", main="Female", col=2) # probability for b=0 (black) lines(time, exp(beta0 + beta1 * time + beta2) / (1 + exp(beta0 + beta1 * time + beta2)), lwd=2) # mean probability (red) lines(time, apply(pFemale, 1, mean), col = 1, lwd = 2) legend("topright", legend=c("b=0","mean probability", "quntiles of b"), fill=c("black", 1, 2)) matplot(time, pMale, type="l", main="Male", col=2) lines(time, exp(beta0 + beta1 * time) / (1 + exp(beta0 + beta1 * time)), lwd=2) lines(time, apply(pMale, 1, mean), col = 1, lwd = 2) legend("topright", legend=c("b=0","mean probability", "quntiles of b"), fill=c("black", 1, 2)) ###### (1e) model with random intercept # and additional interaction of month and gender m2 <- update(m1, .~. + month:gender) summary(m2) ###### (1f) models with random intercept and random slope # without correlation of random effects m3 <- update(m2, .~. + (-1 + month|id)) summary(m3) # with correlation of random effects m4 <- update(m2, .~. - (1|id) + (month|id)) summary(m4) ###### (1g) plot results of models par(mfrow=c(2,2)) #pdf("madrasCfit1.pdf", height=6, width=6) with(madrasC, plot(yN ~ monthN, col=gender, main=bquote(paste("Modell 1: ", y[it]," & ",hat(p)[it], " (c) rand. int.")), ylab="y", xlab="month")) fitted1mat <- matrix(fitted(m1), nrow=12) matplot(0:11, fitted1mat, type="l", lty=1, col=madrasC$gender[seq(1,by=12,l=69)], add=TRUE) #dev.off() #pdf("madrasCfit2.pdf", height=6, width=6) with(madrasC, plot(yN ~ monthN, col=gender, main=bquote(paste("Modell 2: ", y[it]," & ",hat(p)[it], " (e) rand. int., sex:month")), ylab="y", xlab="month")) fitted2mat <- matrix(fitted(m2), nrow=12) matplot(0:11, fitted2mat, type="l", lty=1, col=madrasC$gender[seq(1,by=12,l=69)], add=TRUE) #dev.off() #pdf("madrasCfit3.pdf", height=6, width=6) with(madrasC, plot(yN ~ monthN, col=gender, main=bquote(paste("Modell 3: ", y[it]," & ",hat(p)[it], " (f) rand. int.+slope, sex:month")), ylab="y", xlab="month")) fitted3mat <- matrix(fitted(m3), nrow=12) matplot(0:11, fitted3mat, type="l", lty=1, col=madrasC$gender[seq(1,by=12,l=69)], add=TRUE) #dev.off() #pdf("madrasCfit4.pdf", height=6, width=6) with(madrasC, plot(yN ~ monthN, col=gender, main=bquote(paste("Modell 4: ", y[it]," & ",hat(p)[it], " (f) rand. int.+slope corr., sex:month")), ylab="y", xlab="month")) fitted4mat <- matrix(fitted(m4), nrow=12) matplot(0:11, fitted4mat, type="l", lty=1, col=madrasC$gender[seq(1,by=12,l=69)], add=TRUE) #dev.off() ###### (1h) SUPPLEMENT # generate plot for interpretation of coefficients # look at random effects of m1 quantile(exp(ranef(m1)$id$'(Intercept)'), p=c(.2,.4,.6,.8)) summary(m1) beta0 <- fixef(m1)[1] beta1 <- fixef(m1)[2] # quantiles of "exp(ranef)" expquants <- quantile(exp(ranef(m1)$id$'(Intercept)'), p=seq(0.1, 0.9, 0.05)) time <- 0:11 # probability depending on quantile of b indv <- sapply(expquants, function(q){q * exp(beta0 + beta1 * time) / (1 + q * exp(beta0 + beta1 * time))}) #pdf("interpretationbeta.pdf") par(mfrow=c(1,1)) plot(time, exp(beta0 + beta1 * time) / (1 + exp(beta0 + beta1 * time)), type ='l', ylim = c(0:1), lwd = 2, ylab = 'P(y = 1 | x, beta, b)', xlab = 'month') for (q in 1:length(expquants)){ lines(time, indv[, q], type ='l', col = 2) } lines(time, apply(indv, 1, mean), col = 1, lwd = 2) #legend("topright", fill=c(2, "black", 1), legend=c("P over quantiles of b", "cond. effect", "marg. effect")) #dev.off()