library(lattice)
library(nlme)

################################################################################
# Exercise 2
# Mixed effects model with random intercept
################################################################################

# Erzeuge Daten fuer 100m-Lauf
set.seed(123)
n1 = 10
n2 = n1
n = n1 + n2
M = 3
b = rep(rnorm(n1 + n2, 0, 0.5), each = M)
hist(b)
sex = c(rep(0,n1), rep(1,n2))
beta = c(rep(10.6, n1 * M), rep(11.9, n2 * M))
eps = rnorm(n, 0, 0.2)
times = beta + b + eps
athlete = rep(1:n, each = M)

plot(times ~ athlete, col = athlete, xlab = "LaeuferIn", ylab = "Zeit [sek]")
abline(v=10.5)

#sprint <- data.frame(athlete, sex=rep(sex, each=M), times)
sprint <- data.frame(athlete=factor(athlete), sex=factor(rep(sex, each=M)), times)

str(sprint)

# have a look at the data
plot(sprint)

###### (2a)

# sprint0 <- groupedData(times~1|athlete, data=sprint, outer=~sex, 
#             labels=list(x="", y="time"), units=list(x="", y="sec"))

sprint0 <- groupedData(times ~ 1|athlete, data=sprint, outer=~sex)

# outer: Faktoren, wie zB Geschlecht oder Behandlungsgruppe, 
# die invariant sind bezueglich der subjektspezifischen Beobachtungen
# bei mehreren Faktoren zB: outer=sex*nation oder outer=nation*sex

# inner: Faktoren, die sich innerhalb der Gruppierungsstruktur aendern 
# und als feste Effekte modelliert werden sollen; 
# hier zB Art der Diaet, die der Soprtler am Tag des Laufes bekommen hat
# wenn sich die Diaet ueber die Tage und die Sportler aendert.

# wenn man Faktoren, die sich innerhalb der Gruppierungsvariable aendern als 
# zufaellige Effekte modellieren moechte, sollte man sie als genestete 
# Gruppierungsstruktur angeben;
# hier zB Info auf welcher Bahn der Lauf stattfindet, wenn es verschiedenen Bahnen gibt,
# die von den Sportlern verwendet werden.

## verschiedene Mgl um ein groupedData-Objekt zu plotten
## ?plot.nmGroupedData
plot(sprint0, outer=~sex)
plot(sprint0, outer=TRUE) # use the specification of outer in the groupedData-object
plot(sprint0, inner=~athlete)
plot(sprint0, inner=~athlete, outer=~sex)

plot.design(sprint0)

###### (2b)
# fit the model with ML
sprintML <- lme(times ~  sex, random=~1|athlete, data=sprint, method="ML")
summary(sprintML)
# fit the model with REML
sprintREML <- lme(times ~  sex, random=~1|athlete, data=sprint, method="REML")
summary(sprintREML)

# compare variance components
getVarCov(sprintML) # kleinere geschaetze Varianzen
getVarCov(sprintREML)

# compare fixed effects
fixef(sprintML)
fixef(sprintREML)
# sind i.A. nicht gleich

# compare random effects
cbind("ML"=ranef(sprintML), "REML"=ranef(sprintREML))


###### (2c)
# compute fixed effects by hand

# design matrices and response 
X <- cbind(rep(1, 60), as.numeric(levels(sprint$sex)[sprint$sex]) )
Z <- kronecker(diag(20), rep(1,3))
y <- sprint$times

# Covariance matrices
R <- diag(rep(sprintML$sigma^2, 60)) # covariance of residuals
G <- diag(rep(getVarCov(sprintML), 20)) # covariance of random effects
V <- Z %*% G %*% t(Z) + R # covariance of response

# estimate beta
hatBeta <- solve(t(X) %*% solve(V) %*% X) %*% t(X) %*% solve(V) %*% y

# compare estimates
hatBeta
fixef(sprintML)


###### (2d)
# Plot the original data
with(sprint, plot(times ~ athlete, xlab = "LaeuferIn", ylab = "Zeit [sek]"))
# add predictions of fixed effects
with(sprint, points(predict(sprintREML, level=0)~athlete, pch=2, col=3, lwd=2))
# add predictions of fixed and random effects
with(sprint, points(predict(sprintREML)~athlete, pch=3, col=4, lwd=2))
legend("bottomright", legend=c("observed", "fixef", "fixef+ranef"), pch=1:3, col=c(1,3,4))


###### (2e)
# extra: nation
# -> nested design: athlete %in% nation
nationAthlete <- sample(1:4, 20, replace=TRUE)
nation <- rep(nationAthlete, each=M)
sprint$nation <- factor(nation, levels=1:4, labels=c("A","B","C","D"))

with(sprint, plot(times ~ athlete, col = NULL, border= nationAthlete, 
                  xlab = "LaeuferIn", ylab = "Zeit [sek]"))
abline(v=10.5)
with(sprint, plot(times ~ nation, xlab = "Nation", ylab = "Zeit [sek]", border= nationAthlete))

fm2sprint <- lme(times ~  sex, random=~1|nation/athlete, data=sprint)
summary(fm2sprint)

fixef(fm2sprint)
ranef(fm2sprint)


# ###### (2f)
# ###### Compare fixed and random effects
# # fit a model with fixed effects for persons
# sprintFixed <- lm(times ~ -1 + athlete, data=sprint)
# 
# # compare estimates with those of the model with fixed sex and random effect per person
# # the results are very similar 
# cbind("Fixed"=predict(sprintFixed), "Fixed+Random"=predict(sprintREML))
# predict(sprintFixed)-predict(sprintREML)
# 
# # Plot the original data
# with(sprint, plot(times ~ athlete, xlab = "LaeuferIn", ylab = "Zeit [sek]"))
# # add predictions of fixed effects
# with(sprint, points(predict(sprintREML, level=0)~athlete, pch=2, col=3, lwd=2))
# # add predictions of fixed and random effects
# with(sprint, points(predict(sprintREML)~athlete, pch=3, col=4, lwd=2))
# # add predictions of model with fixed effects for persons 
# with(sprint, points(predict(sprintFixed)~athlete, pch=4, col=5, lwd=2))
# legend("bottomright", legend=c("observed", "fixef", "fixef+ranef", "fixef+fixef"), pch=1:4, col=c(1,3,4,5))



################################################################################
# data(package="nlme")

# Two data examples of the book
# Pinheiro, J. C.; Bates, D. M. (2000). Mixed-Effects Models in S and S-PLUS. 
# Springer, New York.

################################################################################
# Exercise 3
# Mixed-Effects Model for Replicated, Blocked Designs
################################################################################

##### have a look at the data
?Machines

# Details
# Data on an experiment to compare three brands of machines used in an industrial 
# process are presented in Milliken and Johnson (p. 285, 1992). Six workers were 
# chosen randomly among the employees of a factory to operate each machine three times. 
# The response is an overall productivity score taking into account the number and quality 
# of components produced.

# Machines data ist als groupedData-Object gespeichert
class(Machines)
?groupedData

# Plot the mean productivity scores for each worker over the 3 machines
with(Machines, interaction.plot(Machine, Worker, score, las=1))

# with(Machines, boxplot(score ~ Worker))
# with(Machines, boxplot(score ~ Machine))
# with(Machines, boxplot(score ~ Machine + Worker))
# with(Machines, boxplot(score ~ Worker + Machine))

pdf("Machines.pdf")
with(Machines, plot.default(score ~ as.numeric(levels(Worker)[Worker]), main="Machines",
                            pch=as.numeric(Machine), xlab="Worker", ylab="score"))
legend("bottomleft", title="Machine", legend=c("A","B","C"), pch=1:3)
dev.off()

with(Machines, boxplot(score ~ Worker + Machine, las=2))


################

# # model with fixed effects for the type of machine
# fm0Machine <- lme(score ~ Machine, data=Machines)
# summary(fm0Machine)

###### (3a)
# random intercept for each worker
fm1Machine <- lme(score ~  - 1 + Machine, random=~1|Worker, data=Machines)
summary(fm1Machine)
# plot of residuals
plot(fm1Machine)
#plot(fm1Machine, cex=2, pch=16)
fixef(fm1Machine)
ranef(fm1Machine)


###### (3b)
## generate an interaction plot: 
## averaging the scores for each worker on each machine type, 
## plot these averages against the machine type, 
## and join the points for each worker. 
with(Machines, interaction.plot(Machine, Worker, score, las=1))
## if there were no interactions, the lines in the plots would be 
## approximately parallel 

# ## first possibility for interaction 
# # add random interaction term for machine within each worker
# fm2Machine <- lme(score ~ - 1 + Machine, random=~1|Worker/Machine, data=Machines)
# summary(fm2Machine)
# plot(fm2Machine)
# fixef(fm2Machine)
# ranef(fm2Machine)

## second possibility for interaction 
# add correlation between random interactions, 
# do not model random effects for Worker
fm3Machine <- lme(score ~  - 1 + Machine, random=~Machine-1|Worker, data=Machines)
summary(fm3Machine)
plot(fm1Machine)
fixef(fm3Machine)
ranef(fm3Machine)

# get covariance matrix of b_i
(D <- getVarCov(fm3Machine))
cov2cor(D) # get correlation

### extra!
# plot of orgininal data and predictions of model (a) and model (b)
with(Machines, plot.default(score ~ Worker, pch=as.numeric(Machine)))
legend("topleft", legend=c("Machine A","Machine B","Machine C"), pch=1:3)
with(Machines, points(predict(fm1Machine) ~ Worker, pch=as.numeric(Machine), col=3, lwd=2))
with(Machines, points(predict(fm3Machine) ~ Worker, pch=as.numeric(Machine), col=4, lwd=2))
legend("bottomright", legend=c("mod (a)","mod (b)"), fil=3:4)

# have a look at different parametrizations, at the example of worker 1
Machine1 <- Machines[Machines$Worker=="1",]
model.matrix(score ~ Machine, data=Machine1) # with intercept
model.matrix(score ~ - 1 + Machine, data=Machine1) # without intercept


###### (3c)
# delete some observations to obtain unbalanced data
dim(Machines)
MachinesUnb <- Machines[sort(sample(1:54, 44)), ] 

fm1MachineUnb <- lme(score ~  - 1 + Machine, random=~1|Worker, data=MachinesUnb)
summary(fm1MachineUnb)

fm3MachineUnb <- lme(score ~  - 1 + Machine, random=~Machine-1|Worker, data=MachinesUnb)
summary(fm3MachineUnb)

# fixed effects in balanced design
fixef(fm1Machine)
fixef(fm3Machine)

# fixed effects in unbalanced design
fixef(fm1MachineUnb)
fixef(fm3MachineUnb)

# Bei balancierten Daten werden die festen Effekte immer auf die gleichen Werte 
# geschaetzt unabhaengig von der Spezifizierung der zufaelligen Effekte. 
# Im unbalancierten Design aendern sich die Schaetzer der festen Effekte, 
# je nachdem welche zufaelligen Effekte spezifiziert werden.


################################################################################
# Exercise 4
# Oxide: nested design
################################################################################

##### have a look at the data
# library(nlme)
?Oxide
plot(Oxide, inner=~Lot) # same color per Lot


###### (4a)
## LMM mit zufaelligen Effekten fuer Scheiben (Wafer) in Chargen (Lot)  
fm1Oxide <- lme( Thickness ~ 1, random = ~ 1 | Lot/Wafer, data=Oxide )
summary(fm1Oxide)

## feste und zufaellige Effekte
fixef(fm1Oxide)
ranef(fm1Oxide)

###### (4b)
plot(fm1Oxide)
# oder von Hand
plot(resid(fm1Oxide), col=Oxide$Lot, pch=as.numeric(Oxide$Wafer))
abline(h=0)


################################################################################
# Exercise 5
# Blutdruck ueber die Dosis
################################################################################


### Simulate data ###
set.seed(123)
Ji <- rep(6,30)
n <- sum(Ji)
I <- length(Ji)
ID <- rep(1:I, Ji)
gender <- 1*(ID>15)
dosis <- rep(c(10, 20, 40, 80, 120, 160), I)
RI <- rnorm(I, 10)
RS <- rnorm(I, sd = 0.09)
eps <- rnorm(n, sd = 2)

fun <- function(x){
  -2*sqrt(x) - x/20
}

SBD <- 150 + fun(dosis) - 2 * gender - dosis * gender/100 + 
  RI[ID] + RS[ID] * dosis + eps

blutdruck <- data.frame(ID=ID, gender = gender, dosis = dosis, SBD = SBD)

### Plot Data ###
pdf("blutdruck.pdf")
plot(0, xlim = c(10, 160), ylim = c(min(SBD), max(SBD)),  xlab = "Dosis [mg]", 
     ylab = "Systolischer Blutdruck [mmHg]", type = "n", cex.axis = 1.3, cex.lab = 1.3)
for (i in 1:I){
  data1 <- blutdruck[(ID == i),]
  points(data1$dosis, data1$SBD, pch = data1$gender)
  lines(data1$dosis, data1$SBD)  ##, lty = data1$gender + 1
}
dev.off()



###### (5a)
## Modell mit festen Effekten
lm <- lm(SBD ~ gender + dosis + gender * dosis, data=blutdruck)
summary(lm)


###### (5b) 
## Modell mit random intercept
lmm1 <- lme(SBD ~ gender + dosis + gender * dosis, 
            random = ~ 1 |ID, data=blutdruck)
summary(lmm1)


###### extra: 
## compare estimates of fixed effects
cbind(coef(lm), fixef(lmm1))
## compare variances of estimated of fixed effects
vcov(lm)
vcov(lmm1)


###### (5c) 
### Gemischtes Modell mit zufaelligen Intercepts und zufaelligen Steigunge in dosis
lmm <- lme(SBD ~ gender + dosis + gender * dosis, 
           random = ~ 1 + dosis|ID, data=blutdruck)
lmm
summary(lmm)

# Kovarianzmatrix der Schaetzer der festen Effekte
lmm$varFix
vcov(lmm) ## ergibt das gleiche

# Kovarianzmatrix der zufaelligen Effekte
getVarCov(lmm)