################################################################################
# Exercise 1
# GLMM 
################################################################################


library(RColorBrewer)
palette(brewer.pal(9,'Set1'))
library(ggplot2)


###### (1a) prepare data

load("madras.Rdata")
str(madras)

# # cumulative number of attacks per person
# madras$count <- unlist(tapply(madras$y,madras$id,cumsum))

# madrasC: only keep patients with all 12 observations 
completeId <- names(table(madras$id)[table(madras$id)==12])
madrasC <- subset(madras, madras$id %in% completeId)
madrasC$id <- factor(madrasC$id) # drop unused levels

# control
table(madrasC$id)


###### (1b) plot data

## possible solution:
# calculate relative frequency of attacks over time for males and femals
yMatMales <- matrix(subset(madrasC$y, madrasC$gender=="Male"), nrow=12)
relFMales <- apply(yMatMales, 1, mean)

yMatFemales <- matrix(subset(madrasC$y, madrasC$gender=="Female"), nrow=12)
relFFemales <- apply(yMatFemales, 1, mean)

# add random noise to response and to month before plotting
madrasC$yN <- madrasC$y + runif(length(madrasC$y), -0.05, 0.05)
madrasC$monthN <- madrasC$month + runif(length(madrasC$month), -0.2, 0.2)

#pdf("madrasCdata.pdf", height=6, width=6)
with(madrasC, plot(yN ~ monthN, col=gender,
                   main="rel. H?ufigkeit der Sch?be ?ber die Zeit",
                   ylab="y", xlab="month"))
m <- 0:11
lines(m, predict(loess(relFMales~m)), col=1, lwd=2)
lines(m, predict(loess(relFFemales~m)), col=2, lwd=2)
#dev.off()


###### (1c) model with random intercept
library(lme4)
?glmer

m1 <- glmer(y ~ month + gender + (1|id), family=binomial(), data=madrasC)
summary(m1)

fixef(m1)
exp(fixef(m1))
#ranef(m1)

summary(m1)$varcor

###### (1d) odds ratio for sex

### conditional odds ratio of sex (individual parameter)
exp(fixef(m1)[3])


#### two different solution to approximate the marginal odds ratio
## Solution 1: simulate data from the model to estimate the conditional expectations
## Solution 2: use the predicted b_i from the data to estimate the conditional expectations

################### Solution 1 by Lisa Steyer and Tore Erdmann
#### approximate conditional expectation by simulating from the model

## E_b(P(y=1|x,b))
pr_y1 <- function(x, beta, b) {
  mean(exp(x %*% beta + b) / (1 + exp(x %*% beta + b)))
}

## E_b(P(y=0|x,b))
pr_y0 <- function(x, beta, b) {
  mean( 1 / (1 + exp(x %*% beta + b)))
}

# get estimated sd of random intercepts
tau <- sqrt(summary(m1)$varcor$id[1])

# ## estimate sd of random intercepts, by using the predictions of the b_i s
# ## smaller than direct estimate! (gives a result closer to Solution 2)
# tau <- sd(ranef(m1)[[1]][[1]])

## compute Odds ratios at monthi
# n number of simulated observations from the model
# fixef(m1): beta_0, beta_month, beta_sex 
# for men: x=c(1, monthti, 0)
# for women: x=c(1, monthti, 1)
f1 <- function(monthi, n = 10000) {
  (pr_y1(x = c(1, monthi, 1), beta = fixef(m1), b = rnorm(n = n, sd = tau)) /
     pr_y0(x = c(1, monthi, 1), beta = fixef(m1), b = rnorm(n = n, sd = tau)) ) /
    (pr_y1(x = c(1, monthi, 0), beta = fixef(m1), b = rnorm(n = n, sd = tau)) /
       pr_y0(x = c(1, monthi, 0), beta = fixef(m1), b = rnorm(n = n, sd = tau)) )
}

# apply function f1() to the month 0:11 
margORtime1 <- sapply(0:11, function(i) f1(i, n = 100000))
#plot(x = 0:11, y = margORtime1)

## mean overall odds ratio
margOR1 <- mean(margORtime1)


################### Solution 2
### approximate marginal odds ratio of sex (population parameter)
# is the person female?
female <- madrasC$gender[seq(1, by=12, l=length(table(madrasC$id)))]=="Female"

# random effects females
bFemale <- ranef(m1)$id$'(Intercept)'[female]
# random effects males
bMale <- ranef(m1)$id$'(Intercept)'[!female]

# get the fixed effects
time <- 0:11
beta0 <- fixef(m1)[1]
beta1 <- fixef(m1)[2]
beta2 <- fixef(m1)[3]

# probabilities of attacks for femals depending on b and month
# one woman per column, time in rows
pFemale <- sapply(bFemale, function(b){exp(beta0 + beta1 * time + beta2 + b) / 
                                        (1 + exp(beta0 + beta1 * time + beta2 + b))})

# probabilities of attacks for males depending on b and month
# one man per column, time in rows
pMale <- sapply(bMale, function(b){exp(beta0 + beta1 * time + b) / 
                                      (1 + exp(beta0 + beta1 * time + b))})

# odds ratios for time=0,...,11 - expectation is appriximated by the mean over all b
margORtime2 <- ( rowMeans(pFemale) /rowMeans(1-pFemale) )  / ( rowMeans(pMale) /rowMeans(1-pMale) )

# mean marginal odds ratio over all b and times = approximation of expectation
margOR2 <- mean(( rowMeans(pFemale) /rowMeans(1-pFemale))  / ( rowMeans(pMale) /rowMeans(1-pMale)))


##############################################
# compare with conditional odds ratio of sex
exp(fixef(m1)[3])

margOR1  ## use Solution 1
margOR2

# marginal odds ratio of 0.37 closer to 1 than conditional odds ratio of 0.21
# this means that the marginal odds ratio is less extreme, 
# i.e. there is less effect on the population-level than on the person-level
# see 1(h) for a graphical illustration 

# intuition: the larger the sd of the random effects, the bigger gets
# the difference between conditional and marginal OR

############## plot marginal and conditional odds ratio for comparison
plot(0:11, margORtime2, type="b", col=1)
lines(x = 0:11, y = margORtime1, type="b", col=2)
abline(h=exp(beta2), col=4, lwd=2)
abline(h=mean(margORtime2), col=1, lwd=2)
abline(h=mean(margORtime1), col=2, lwd=2)
legend("topright", fill=c(c(1,2,4)), 
       legend=c("marginal, predicted b_i", "marginal, simulated b_i", "conditional") )


### comment: if prior interest is in population parameter: 
### fit a generalized estimating equation model (GEE), R-package gee


##############################################
# plot probabilities over time for males and females
par(mfrow=c(1,2))

matplot(time, pFemale, type="l", main="Female", col=2)
# probability for b=0 (black)
lines(time, exp(beta0 + beta1 * time + beta2) / 
        (1 + exp(beta0 + beta1 * time + beta2)), lwd=2)
# mean probability (red)
lines(time, apply(pFemale, 1, mean), col = 1, lwd = 2)
legend("topright", legend=c("b=0","mean probability", "quntiles of b"), 
       fill=c("black", 1, 2))

matplot(time, pMale, type="l", main="Male", col=2)
lines(time, exp(beta0 + beta1 * time) / 
        (1 + exp(beta0 + beta1 * time)), lwd=2)
lines(time, apply(pMale, 1, mean), col = 1, lwd = 2)
legend("topright", legend=c("b=0","mean probability", "quntiles of b"), 
       fill=c("black", 1, 2))


###### (1e) model with random intercept 
# and additional interaction of month and gender
m2 <- update(m1, .~. + month:gender)
summary(m2)


###### (1f) models with random intercept and random slope

# without correlation of random effects
m3 <- update(m2, .~. + (-1 + month|id))
summary(m3)

# with correlation of random effects
m4 <- update(m2, .~. - (1|id) + (month|id)) 
summary(m4)


###### (1g) plot results of models

par(mfrow=c(2,2))

#pdf("madrasCfit1.pdf", height=6, width=6)
with(madrasC, plot(yN ~ monthN, col=gender,
                   main=bquote(paste("Modell 1: ", y[it]," & ",hat(p)[it], " (c) rand. int.")),
                   ylab="y", xlab="month"))
fitted1mat <- matrix(fitted(m1), nrow=12)
matplot(0:11, fitted1mat, type="l", lty=1, col=madrasC$gender[seq(1,by=12,l=69)], add=TRUE)
#dev.off()


#pdf("madrasCfit2.pdf", height=6, width=6)
with(madrasC, plot(yN ~ monthN, col=gender,
                   main=bquote(paste("Modell 2: ", y[it]," & ",hat(p)[it], " (e) rand. int., sex:month")),
                   ylab="y", xlab="month"))
fitted2mat <- matrix(fitted(m2), nrow=12)
matplot(0:11, fitted2mat, type="l", lty=1, col=madrasC$gender[seq(1,by=12,l=69)], add=TRUE)
#dev.off()


#pdf("madrasCfit3.pdf", height=6, width=6)
with(madrasC, plot(yN ~ monthN, col=gender,
                   main=bquote(paste("Modell 3: ", y[it]," & ",hat(p)[it], " (f) rand. int.+slope, sex:month")),
                   ylab="y", xlab="month"))
fitted3mat <- matrix(fitted(m3), nrow=12)
matplot(0:11, fitted3mat, type="l", lty=1, col=madrasC$gender[seq(1,by=12,l=69)], add=TRUE)
#dev.off()


#pdf("madrasCfit4.pdf", height=6, width=6)
with(madrasC, plot(yN ~ monthN, col=gender,
                   main=bquote(paste("Modell 4: ", y[it]," & ",hat(p)[it], " (f) rand. int.+slope corr., sex:month")),
                   ylab="y", xlab="month"))
fitted4mat <- matrix(fitted(m4), nrow=12)
matplot(0:11, fitted4mat, type="l", lty=1, col=madrasC$gender[seq(1,by=12,l=69)], add=TRUE)
#dev.off()


###### (1h) SUPPLEMENT
# generate plot for interpretation of coefficients

# look at random effects of m1
quantile(exp(ranef(m1)$id$'(Intercept)'), p=c(.2,.4,.6,.8))
summary(m1)

beta0 <- fixef(m1)[1]
beta1 <- fixef(m1)[2]

# quantiles of "exp(ranef)"
expquants <- quantile(exp(ranef(m1)$id$'(Intercept)'), p=seq(0.1, 0.9, 0.05))
time <- 0:11

# probability depending on quantile of b
indv <- sapply(expquants, function(q){q * exp(beta0 + beta1 * time) / 
                                     (1 + q * exp(beta0 + beta1 * time))})

#pdf("interpretationbeta.pdf")
par(mfrow=c(1,1))
plot(time, exp(beta0 + beta1 * time) / (1 + exp(beta0 + beta1 * time)), 
     type ='l', ylim = c(0:1), lwd = 2,
     ylab = 'P(y = 1 | x, beta, b)', xlab = 'month')
for (q in 1:length(expquants)){
  lines(time, indv[, q], type ='l', col = 2)  
}
lines(time, apply(indv, 1, mean), col = 1, lwd = 2)
#legend("topright", fill=c(2, "black",  1), legend=c("P over quantiles of b", "cond. effect",  "marg. effect"))
#dev.off()